From e69d49ad57e289df0aaaf2d71c79bdcf9c9ccfa8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 26 Jul 2024 05:55:47 +0000 Subject: [PATCH 001/322] chore(deps): bump crate-ci/typos from 1.23.3 to 1.23.5 Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.23.3 to 1.23.5. - [Release notes](https://github.com/crate-ci/typos/releases) - [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md) - [Commits](https://github.com/crate-ci/typos/compare/v1.23.3...v1.23.5) --- updated-dependencies: - dependency-name: crate-ci/typos dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index f86d56878..aad73bc13 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -112,7 +112,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Check Spelling - uses: crate-ci/typos@v1.23.3 + uses: crate-ci/typos@v1.23.5 build: name: Build From 5bf41d2c72cd89c2c47d2ac9f6d15548f114185b Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Fri, 19 Jul 2024 12:06:27 +0800 Subject: [PATCH 002/322] fix(ci): add doc test Signed-off-by: lxl66566 --- .github/workflows/pull_request.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index aad73bc13..17ab73206 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -24,13 +24,13 @@ jobs: name: "Normal", args: "", rustflags: "", - test: "llvm-cov nextest --all-features --workspace --codecov --output-path codecov.info", + test: "llvm-cov nextest --all-features --workspace --codecov --output-path codecov.info && cargo test --doc", } - { name: "Madsim", args: "--package=simulation", rustflags: "--cfg madsim", - test: "nextest run --package=simulation", + test: "nextest run --package=simulation && cargo test -p simulation --doc", } name: Tests ${{ matrix.config.name }} steps: From 8032dfe2f36c10ddbf11efb8dc272b7739fef2d2 Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Fri, 19 Jul 2024 14:07:40 +0800 Subject: [PATCH 003/322] refactor(client): LeaseClient::* Signed-off-by: lxl66566 refactor(client): LeaseClient::revoke Signed-off-by: lxl66566 refactor(client): LeaseClient::keep_alive Signed-off-by: lxl66566 refactor(client): LeaseClient::time_to_live Signed-off-by: lxl66566 --- crates/xline-client/examples/lease.rs | 23 +-- crates/xline-client/src/clients/lease.rs | 66 +++++---- crates/xline-client/src/clients/lock.rs | 15 +- crates/xline-client/src/types/lease.rs | 132 +----------------- crates/xline-client/tests/it/lease.rs | 45 ++---- crates/xline/tests/it/lease_test.rs | 17 +-- crates/xlinectl/src/command/lease/grant.rs | 17 +-- .../xlinectl/src/command/lease/keep_alive.rs | 18 +-- crates/xlinectl/src/command/lease/revoke.rs | 13 +- .../xlinectl/src/command/lease/timetolive.rs | 15 +- 10 files changed, 92 insertions(+), 269 deletions(-) diff --git a/crates/xline-client/examples/lease.rs b/crates/xline-client/examples/lease.rs index 24f1babe5..56e5dd012 100644 --- a/crates/xline-client/examples/lease.rs +++ b/crates/xline-client/examples/lease.rs @@ -1,10 +1,5 @@ use anyhow::Result; -use xline_client::{ - types::lease::{ - LeaseGrantRequest, LeaseKeepAliveRequest, LeaseRevokeRequest, LeaseTimeToLiveRequest, - }, - Client, ClientOptions, -}; +use xline_client::{Client, ClientOptions}; #[tokio::main] async fn main() -> Result<()> { @@ -16,24 +11,20 @@ async fn main() -> Result<()> { .lease_client(); // grant new lease - let resp1 = client.grant(LeaseGrantRequest::new(60)).await?; - let resp2 = client.grant(LeaseGrantRequest::new(60)).await?; + let resp1 = client.grant(60, None).await?; + let resp2 = client.grant(60, None).await?; let lease_id1 = resp1.id; let lease_id2 = resp2.id; println!("lease id 1: {}", lease_id1); println!("lease id 2: {}", lease_id2); // get the ttl of lease1 - let resp = client - .time_to_live(LeaseTimeToLiveRequest::new(lease_id1)) - .await?; + let resp = client.time_to_live(lease_id1, false).await?; println!("remaining ttl: {}", resp.ttl); // keep alive lease2 - let (mut keeper, mut stream) = client - .keep_alive(LeaseKeepAliveRequest::new(lease_id2)) - .await?; + let (mut keeper, mut stream) = client.keep_alive(lease_id2).await?; if let Some(resp) = stream.message().await? { println!("new ttl: {}", resp.ttl); @@ -48,8 +39,8 @@ async fn main() -> Result<()> { } // revoke the leases - let _resp = client.revoke(LeaseRevokeRequest::new(lease_id1)).await?; - let _resp = client.revoke(LeaseRevokeRequest::new(lease_id2)).await?; + let _resp = client.revoke(lease_id1).await?; + let _resp = client.revoke(lease_id2).await?; Ok(()) } diff --git a/crates/xline-client/src/clients/lease.rs b/crates/xline-client/src/clients/lease.rs index b09577744..42b7a1e18 100644 --- a/crates/xline-client/src/clients/lease.rs +++ b/crates/xline-client/src/clients/lease.rs @@ -10,10 +10,7 @@ use xlineapi::{ use crate::{ error::{Result, XlineClientError}, lease_gen::LeaseIdGenerator, - types::lease::{ - LeaseGrantRequest, LeaseKeepAliveRequest, LeaseKeeper, LeaseRevokeRequest, - LeaseTimeToLiveRequest, - }, + types::lease::LeaseKeeper, AuthService, CurpClient, }; @@ -70,6 +67,9 @@ impl LeaseClient { /// within a given time to live period. All keys attached to the lease will be expired and /// deleted if the lease expires. Each expired key generates a delete event in the event history. /// + /// `ttl` is the advisory time-to-live in seconds. Expired lease will return -1. + /// `id` is the requested ID for the lease. If ID is set to `None` or 0, the lessor chooses an ID. + /// /// # Errors /// /// This function will return an error if the inner CURP client encountered a propose failure @@ -77,7 +77,7 @@ impl LeaseClient { /// # Examples /// /// ```no_run - /// use xline_client::{types::lease::LeaseGrantRequest, Client, ClientOptions}; + /// use xline_client::{Client, ClientOptions}; /// use anyhow::Result; /// /// #[tokio::main] @@ -88,19 +88,22 @@ impl LeaseClient { /// .await? /// .lease_client(); /// - /// let resp = client.grant(LeaseGrantRequest::new(60)).await?; + /// let resp = client.grant(60, None).await?; /// println!("lease id: {}", resp.id); /// /// Ok(()) /// } /// ``` #[inline] - pub async fn grant(&self, mut request: LeaseGrantRequest) -> Result { - if request.inner.id == 0 { - request.inner.id = self.id_gen.next(); + pub async fn grant(&self, ttl: i64, id: Option) -> Result { + let mut id = id.unwrap_or_default(); + if id == 0 { + id = self.id_gen.next(); } - let request = RequestWrapper::from(xlineapi::LeaseGrantRequest::from(request)); - let cmd = Command::new(request); + let cmd = Command::new(RequestWrapper::from(xlineapi::LeaseGrantRequest { + ttl, + id, + })); let (cmd_res, _sync_res) = self .curp_client .propose(&cmd, self.token.as_ref(), true) @@ -110,6 +113,8 @@ impl LeaseClient { /// Revokes a lease. All keys attached to the lease will expire and be deleted. /// + /// `id` is the lease ID to revoke. When the ID is revoked, all associated keys will be deleted. + /// /// # Errors /// /// This function will return an error if the inner RPC client encountered a propose failure @@ -117,7 +122,7 @@ impl LeaseClient { /// # Examples /// /// ```no_run - /// use xline_client::{types::lease::LeaseRevokeRequest, Client, ClientOptions}; + /// use xline_client::{Client, ClientOptions}; /// use anyhow::Result; /// /// #[tokio::main] @@ -130,20 +135,25 @@ impl LeaseClient { /// /// // granted a lease id 1 /// - /// let _resp = client.revoke(LeaseRevokeRequest::new(1)).await?; + /// let _resp = client.revoke(1).await?; /// /// Ok(()) /// } /// ``` #[inline] - pub async fn revoke(&mut self, request: LeaseRevokeRequest) -> Result { - let res = self.lease_client.lease_revoke(request.inner).await?; + pub async fn revoke(&mut self, id: i64) -> Result { + let res = self + .lease_client + .lease_revoke(xlineapi::LeaseRevokeRequest { id }) + .await?; Ok(res.into_inner()) } /// Keeps the lease alive by streaming keep alive requests from the client /// to the server and streaming keep alive responses from the server to the client. /// + /// `id` is the lease ID for the lease to keep alive. + /// /// # Errors /// /// This function will return an error if the inner RPC client encountered a propose failure @@ -151,7 +161,7 @@ impl LeaseClient { /// # Examples /// /// ```no_run - /// use xline_client::{types::lease::LeaseKeepAliveRequest, Client, ClientOptions}; + /// use xline_client::{Client, ClientOptions}; /// use anyhow::Result; /// /// #[tokio::main] @@ -164,7 +174,7 @@ impl LeaseClient { /// /// // granted a lease id 1 /// - /// let (mut keeper, mut stream) = client.keep_alive(LeaseKeepAliveRequest::new(1)).await?; + /// let (mut keeper, mut stream) = client.keep_alive(1).await?; /// /// if let Some(resp) = stream.message().await? { /// println!("new ttl: {}", resp.ttl); @@ -178,12 +188,12 @@ impl LeaseClient { #[inline] pub async fn keep_alive( &mut self, - request: LeaseKeepAliveRequest, + id: i64, ) -> Result<(LeaseKeeper, Streaming)> { let (mut sender, receiver) = channel::(100); sender - .try_send(request.into()) + .try_send(xlineapi::LeaseKeepAliveRequest { id }) .map_err(|e| XlineClientError::LeaseError(e.to_string()))?; let mut stream = self @@ -192,7 +202,7 @@ impl LeaseClient { .await? .into_inner(); - let id = match stream.message().await? { + let resp_id = match stream.message().await? { Some(resp) => resp.id, None => { return Err(XlineClientError::LeaseError(String::from( @@ -201,11 +211,14 @@ impl LeaseClient { } }; - Ok((LeaseKeeper::new(id, sender), stream)) + Ok((LeaseKeeper::new(resp_id, sender), stream)) } /// Retrieves lease information. /// + /// `id` is the lease ID for the lease, + /// `keys` is true to query all the keys attached to this lease. + /// /// # Errors /// /// This function will return an error if the inner RPC client encountered a propose failure @@ -213,7 +226,7 @@ impl LeaseClient { /// # Examples /// /// ```no_run - /// use xline_client::{types::lease::LeaseTimeToLiveRequest, Client, ClientOptions}; + /// use xline_client::{Client, ClientOptions}; /// use anyhow::Result; /// /// #[tokio::main] @@ -226,7 +239,7 @@ impl LeaseClient { /// /// // granted a lease id 1 /// - /// let resp = client.time_to_live(LeaseTimeToLiveRequest::new(1)).await?; + /// let resp = client.time_to_live(1, false).await?; /// /// println!("remaining ttl: {}", resp.ttl); /// @@ -234,13 +247,10 @@ impl LeaseClient { /// } /// ``` #[inline] - pub async fn time_to_live( - &mut self, - request: LeaseTimeToLiveRequest, - ) -> Result { + pub async fn time_to_live(&mut self, id: i64, keys: bool) -> Result { Ok(self .lease_client - .lease_time_to_live(xlineapi::LeaseTimeToLiveRequest::from(request)) + .lease_time_to_live(xlineapi::LeaseTimeToLiveRequest { id, keys }) .await? .into_inner()) } diff --git a/crates/xline-client/src/clients/lock.rs b/crates/xline-client/src/clients/lock.rs index d5761f6ad..322c4ab09 100644 --- a/crates/xline-client/src/clients/lock.rs +++ b/crates/xline-client/src/clients/lock.rs @@ -16,11 +16,7 @@ use crate::{ clients::{lease::LeaseClient, watch::WatchClient, DEFAULT_SESSION_TTL}, error::{Result, XlineClientError}, lease_gen::LeaseIdGenerator, - types::{ - kv::TxnRequest as KvTxnRequest, - lease::{LeaseGrantRequest, LeaseKeepAliveRequest}, - watch::WatchRequest, - }, + types::{kv::TxnRequest as KvTxnRequest, watch::WatchRequest}, CurpClient, }; @@ -130,19 +126,14 @@ impl Xutex { let lease_id = if let Some(id) = lease_id { id } else { - let lease_response = client - .lease_client - .grant(LeaseGrantRequest::new(ttl)) - .await?; + let lease_response = client.lease_client.grant(ttl, None).await?; lease_response.id }; let mut lease_client = client.lease_client.clone(); let keep_alive = Some(tokio::spawn(async move { /// The renew interval factor of which value equals 60% of one second. const RENEW_INTERVAL_FACTOR: u64 = 600; - let (mut keeper, mut stream) = lease_client - .keep_alive(LeaseKeepAliveRequest::new(lease_id)) - .await?; + let (mut keeper, mut stream) = lease_client.keep_alive(lease_id).await?; loop { keeper.keep_alive()?; if let Some(resp) = stream.message().await? { diff --git a/crates/xline-client/src/types/lease.rs b/crates/xline-client/src/types/lease.rs index fbf39fad6..03fa80cc2 100644 --- a/crates/xline-client/src/types/lease.rs +++ b/crates/xline-client/src/types/lease.rs @@ -38,137 +38,7 @@ impl LeaseKeeper { #[inline] pub fn keep_alive(&mut self) -> Result<()> { self.sender - .try_send(LeaseKeepAliveRequest::new(self.id).into()) + .try_send(xlineapi::LeaseKeepAliveRequest { id: self.id }) .map_err(|e| XlineClientError::LeaseError(e.to_string())) } } - -/// Request for `LeaseGrant` -#[derive(Debug, PartialEq)] -pub struct LeaseGrantRequest { - /// Inner request - pub(crate) inner: xlineapi::LeaseGrantRequest, -} - -impl LeaseGrantRequest { - /// Creates a new `LeaseGrantRequest` - /// - /// `ttl` is the advisory time-to-live in seconds. Expired lease will return -1. - #[inline] - #[must_use] - pub fn new(ttl: i64) -> Self { - Self { - inner: xlineapi::LeaseGrantRequest { - ttl, - ..Default::default() - }, - } - } - - /// `id` is the requested ID for the lease. If ID is set to 0, the lessor chooses an ID. - #[inline] - #[must_use] - pub fn with_id(mut self, id: i64) -> Self { - self.inner.id = id; - self - } -} - -impl From for xlineapi::LeaseGrantRequest { - #[inline] - fn from(req: LeaseGrantRequest) -> Self { - req.inner - } -} - -/// Request for `LeaseRevoke` -#[derive(Debug, PartialEq)] -pub struct LeaseRevokeRequest { - /// Inner request - pub(crate) inner: xlineapi::LeaseRevokeRequest, -} - -impl LeaseRevokeRequest { - /// Creates a new `LeaseRevokeRequest` - /// - /// `id` is the lease ID to revoke. When the ID is revoked, all associated keys will be deleted. - #[inline] - #[must_use] - pub fn new(id: i64) -> Self { - Self { - inner: xlineapi::LeaseRevokeRequest { id }, - } - } -} - -impl From for xlineapi::LeaseRevokeRequest { - #[inline] - fn from(req: LeaseRevokeRequest) -> Self { - req.inner - } -} - -/// Request for `LeaseKeepAlive` -#[derive(Debug, PartialEq)] -pub struct LeaseKeepAliveRequest { - /// Inner request - pub(crate) inner: xlineapi::LeaseKeepAliveRequest, -} - -impl LeaseKeepAliveRequest { - /// Creates a new `LeaseKeepAliveRequest` - /// - /// `id` is the lease ID for the lease to keep alive. - #[inline] - #[must_use] - pub fn new(id: i64) -> Self { - Self { - inner: xlineapi::LeaseKeepAliveRequest { id }, - } - } -} - -impl From for xlineapi::LeaseKeepAliveRequest { - #[inline] - fn from(req: LeaseKeepAliveRequest) -> Self { - req.inner - } -} - -/// Request for `LeaseTimeToLive` -#[derive(Debug, PartialEq)] -pub struct LeaseTimeToLiveRequest { - /// Inner request - pub(crate) inner: xlineapi::LeaseTimeToLiveRequest, -} - -impl LeaseTimeToLiveRequest { - /// Creates a new `LeaseTimeToLiveRequest` - /// - /// `id` is the lease ID for the lease. - #[inline] - #[must_use] - pub fn new(id: i64) -> Self { - Self { - inner: xlineapi::LeaseTimeToLiveRequest { - id, - ..Default::default() - }, - } - } - - /// `keys` is true to query all the keys attached to this lease. - #[inline] - #[must_use] - pub fn with_keys(mut self, keys: bool) -> Self { - self.inner.keys = keys; - self - } -} - -impl From for xlineapi::LeaseTimeToLiveRequest { - #[inline] - fn from(req: LeaseTimeToLiveRequest) -> Self { - req.inner - } -} diff --git a/crates/xline-client/tests/it/lease.rs b/crates/xline-client/tests/it/lease.rs index 4bab8caba..445162eb3 100644 --- a/crates/xline-client/tests/it/lease.rs +++ b/crates/xline-client/tests/it/lease.rs @@ -1,9 +1,4 @@ -use xline_client::{ - error::Result, - types::lease::{ - LeaseGrantRequest, LeaseKeepAliveRequest, LeaseRevokeRequest, LeaseTimeToLiveRequest, - }, -}; +use xline_client::error::Result; use super::common::get_cluster_client; @@ -12,10 +7,10 @@ async fn grant_revoke_should_success_in_normal_path() -> Result<()> { let (_cluster, client) = get_cluster_client().await.unwrap(); let mut client = client.lease_client(); - let resp = client.grant(LeaseGrantRequest::new(123)).await?; + let resp = client.grant(123, None).await?; assert_eq!(resp.ttl, 123); let id = resp.id; - client.revoke(LeaseRevokeRequest::new(id)).await?; + client.revoke(id).await?; Ok(()) } @@ -25,18 +20,18 @@ async fn keep_alive_should_success_in_normal_path() -> Result<()> { let (_cluster, client) = get_cluster_client().await.unwrap(); let mut client = client.lease_client(); - let resp = client.grant(LeaseGrantRequest::new(60)).await?; + let resp = client.grant(60, None).await?; assert_eq!(resp.ttl, 60); let id = resp.id; - let (mut keeper, mut stream) = client.keep_alive(LeaseKeepAliveRequest::new(id)).await?; + let (mut keeper, mut stream) = client.keep_alive(id).await?; keeper.keep_alive()?; let resp = stream.message().await?.unwrap(); assert_eq!(resp.id, keeper.id()); assert_eq!(resp.ttl, 60); - client.revoke(LeaseRevokeRequest::new(id)).await?; + client.revoke(id).await?; Ok(()) } @@ -47,19 +42,15 @@ async fn time_to_live_ttl_is_consistent_in_normal_path() -> Result<()> { let mut client = client.lease_client(); let lease_id = 200; - let resp = client - .grant(LeaseGrantRequest::new(60).with_id(lease_id)) - .await?; + let resp = client.grant(60, Some(lease_id)).await?; assert_eq!(resp.ttl, 60); assert_eq!(resp.id, lease_id); - let resp = client - .time_to_live(LeaseTimeToLiveRequest::new(lease_id)) - .await?; + let resp = client.time_to_live(lease_id, false).await?; assert_eq!(resp.id, lease_id); assert_eq!(resp.granted_ttl, 60); - client.revoke(LeaseRevokeRequest::new(lease_id)).await?; + client.revoke(lease_id).await?; Ok(()) } @@ -73,21 +64,15 @@ async fn leases_should_include_granted_in_normal_path() -> Result<()> { let (_cluster, client) = get_cluster_client().await.unwrap(); let mut client = client.lease_client(); - let resp = client - .grant(LeaseGrantRequest::new(60).with_id(lease1)) - .await?; + let resp = client.grant(60, Some(lease1)).await?; assert_eq!(resp.ttl, 60); assert_eq!(resp.id, lease1); - let resp = client - .grant(LeaseGrantRequest::new(60).with_id(lease2)) - .await?; + let resp = client.grant(60, Some(lease2)).await?; assert_eq!(resp.ttl, 60); assert_eq!(resp.id, lease2); - let resp = client - .grant(LeaseGrantRequest::new(60).with_id(lease3)) - .await?; + let resp = client.grant(60, Some(lease3)).await?; assert_eq!(resp.ttl, 60); assert_eq!(resp.id, lease3); @@ -97,9 +82,9 @@ async fn leases_should_include_granted_in_normal_path() -> Result<()> { assert!(leases.contains(&lease2)); assert!(leases.contains(&lease3)); - client.revoke(LeaseRevokeRequest::new(lease1)).await?; - client.revoke(LeaseRevokeRequest::new(lease2)).await?; - client.revoke(LeaseRevokeRequest::new(lease3)).await?; + client.revoke(lease1).await?; + client.revoke(lease2).await?; + client.revoke(lease3).await?; Ok(()) } diff --git a/crates/xline/tests/it/lease_test.rs b/crates/xline/tests/it/lease_test.rs index 392e59027..df1bda72e 100644 --- a/crates/xline/tests/it/lease_test.rs +++ b/crates/xline/tests/it/lease_test.rs @@ -3,10 +3,7 @@ use std::{error::Error, time::Duration}; use test_macros::abort_on_panic; use tracing::info; use xline_test_utils::{ - types::{ - kv::{PutOptions, RangeRequest}, - lease::{LeaseGrantRequest, LeaseKeepAliveRequest}, - }, + types::kv::{PutOptions, RangeRequest}, Client, ClientOptions, Cluster, }; @@ -17,10 +14,7 @@ async fn test_lease_expired() -> Result<(), Box> { cluster.start().await; let client = cluster.client().await; - let res = client - .lease_client() - .grant(LeaseGrantRequest::new(1)) - .await?; + let res = client.lease_client().grant(1, None).await?; let lease_id = res.id; assert!(lease_id > 0); @@ -52,10 +46,7 @@ async fn test_lease_keep_alive() -> Result<(), Box> { let non_leader_ep = cluster.get_client_url(1); let client = cluster.client().await; - let res = client - .lease_client() - .grant(LeaseGrantRequest::new(1)) - .await?; + let res = client.lease_client().grant(1, None).await?; let lease_id = res.id; assert!(lease_id > 0); @@ -74,7 +65,7 @@ async fn test_lease_keep_alive() -> Result<(), Box> { let mut c = Client::connect(vec![non_leader_ep], ClientOptions::default()) .await? .lease_client(); - let (mut keeper, mut stream) = c.keep_alive(LeaseKeepAliveRequest::new(lease_id)).await?; + let (mut keeper, mut stream) = c.keep_alive(lease_id).await?; let handle = tokio::spawn(async move { loop { tokio::time::sleep(Duration::from_millis(500)).await; diff --git a/crates/xlinectl/src/command/lease/grant.rs b/crates/xlinectl/src/command/lease/grant.rs index 3b3107434..fe452e775 100644 --- a/crates/xlinectl/src/command/lease/grant.rs +++ b/crates/xlinectl/src/command/lease/grant.rs @@ -1,5 +1,5 @@ use clap::{arg, value_parser, ArgMatches, Command}; -use xline_client::{error::Result, types::lease::LeaseGrantRequest, Client}; +use xline_client::{error::Result, Client}; use crate::utils::printer::Printer; @@ -11,15 +11,15 @@ pub(super) fn command() -> Command { } /// Build request from matches -pub(super) fn build_request(matches: &ArgMatches) -> LeaseGrantRequest { +pub(super) fn build_request(matches: &ArgMatches) -> i64 { let ttl = matches.get_one::("ttl").expect("required"); - LeaseGrantRequest::new(*ttl) + *ttl } /// Execute the command pub(super) async fn execute(client: &mut Client, matches: &ArgMatches) -> Result<()> { - let request = build_request(matches); - let resp = client.lease_client().grant(request).await?; + let ttl = build_request(matches); + let resp = client.lease_client().grant(ttl, None).await?; resp.print(); Ok(()) @@ -30,14 +30,11 @@ mod tests { use super::*; use crate::test_case_struct; - test_case_struct!(LeaseGrantRequest); + test_case_struct!(i64); #[test] fn command_parse_should_be_valid() { - let test_cases = vec![TestCase::new( - vec!["grant", "100"], - Some(LeaseGrantRequest::new(100)), - )]; + let test_cases = vec![TestCase::new(vec!["grant", "100"], Some(100))]; for case in test_cases { case.run_test(); diff --git a/crates/xlinectl/src/command/lease/keep_alive.rs b/crates/xlinectl/src/command/lease/keep_alive.rs index 67a208b21..fddfbab8a 100644 --- a/crates/xlinectl/src/command/lease/keep_alive.rs +++ b/crates/xlinectl/src/command/lease/keep_alive.rs @@ -5,7 +5,7 @@ use tokio::signal::ctrl_c; use tonic::Streaming; use xline_client::{ error::{Result, XlineClientError}, - types::lease::{LeaseKeepAliveRequest, LeaseKeeper}, + types::lease::LeaseKeeper, Client, }; use xlineapi::LeaseKeepAliveResponse; @@ -21,9 +21,9 @@ pub(super) fn command() -> Command { } /// Build request from matches -pub(super) fn build_request(matches: &ArgMatches) -> LeaseKeepAliveRequest { +pub(super) fn build_request(matches: &ArgMatches) -> i64 { let lease_id = matches.get_one::("leaseId").expect("required"); - LeaseKeepAliveRequest::new(*lease_id) + *lease_id } /// Execute the command @@ -80,19 +80,13 @@ mod tests { use super::*; use crate::test_case_struct; - test_case_struct!(LeaseKeepAliveRequest); + test_case_struct!(i64); #[test] fn command_parse_should_be_valid() { let test_cases = vec![ - TestCase::new( - vec!["keep_alive", "123"], - Some(LeaseKeepAliveRequest::new(123)), - ), - TestCase::new( - vec!["keep_alive", "456", "--once"], - Some(LeaseKeepAliveRequest::new(456)), - ), + TestCase::new(vec!["keep_alive", "123"], Some(123)), + TestCase::new(vec!["keep_alive", "456", "--once"], Some(456)), ]; for case in test_cases { diff --git a/crates/xlinectl/src/command/lease/revoke.rs b/crates/xlinectl/src/command/lease/revoke.rs index 1ccbdaf4a..12c9b6cce 100644 --- a/crates/xlinectl/src/command/lease/revoke.rs +++ b/crates/xlinectl/src/command/lease/revoke.rs @@ -1,5 +1,5 @@ use clap::{arg, value_parser, ArgMatches, Command}; -use xline_client::{error::Result, types::lease::LeaseRevokeRequest, Client}; +use xline_client::{error::Result, Client}; use crate::utils::printer::Printer; @@ -11,9 +11,9 @@ pub(super) fn command() -> Command { } /// Build request from matches -pub(super) fn build_request(matches: &ArgMatches) -> LeaseRevokeRequest { +pub(super) fn build_request(matches: &ArgMatches) -> i64 { let lease_id = matches.get_one::("leaseId").expect("required"); - LeaseRevokeRequest::new(*lease_id) + *lease_id } /// Execute the command @@ -30,14 +30,11 @@ mod tests { use super::*; use crate::test_case_struct; - test_case_struct!(LeaseRevokeRequest); + test_case_struct!(i64); #[test] fn command_parse_should_be_valid() { - let test_cases = vec![TestCase::new( - vec!["revoke", "123"], - Some(LeaseRevokeRequest::new(123)), - )]; + let test_cases = vec![TestCase::new(vec!["revoke", "123"], Some(123))]; for case in test_cases { case.run_test(); diff --git a/crates/xlinectl/src/command/lease/timetolive.rs b/crates/xlinectl/src/command/lease/timetolive.rs index b9bad3262..2860285ff 100644 --- a/crates/xlinectl/src/command/lease/timetolive.rs +++ b/crates/xlinectl/src/command/lease/timetolive.rs @@ -1,5 +1,5 @@ use clap::{arg, value_parser, ArgMatches, Command}; -use xline_client::{error::Result, types::lease::LeaseTimeToLiveRequest, Client}; +use xline_client::{error::Result, Client}; use crate::utils::printer::Printer; @@ -11,15 +11,15 @@ pub(super) fn command() -> Command { } /// Build request from matches -pub(super) fn build_request(matches: &ArgMatches) -> LeaseTimeToLiveRequest { +pub(super) fn build_request(matches: &ArgMatches) -> i64 { let lease_id = matches.get_one::("leaseId").expect("required"); - LeaseTimeToLiveRequest::new(*lease_id) + *lease_id } /// Execute the command pub(super) async fn execute(client: &mut Client, matches: &ArgMatches) -> Result<()> { let req = build_request(matches); - let resp = client.lease_client().time_to_live(req).await?; + let resp = client.lease_client().time_to_live(req, false).await?; resp.print(); Ok(()) @@ -30,14 +30,11 @@ mod tests { use super::*; use crate::test_case_struct; - test_case_struct!(LeaseTimeToLiveRequest); + test_case_struct!(i64); #[test] fn command_parse_should_be_valid() { - let test_cases = vec![TestCase::new( - vec!["timetolive", "123"], - Some(LeaseTimeToLiveRequest::new(123)), - )]; + let test_cases = vec![TestCase::new(vec!["timetolive", "123"], Some(123))]; for case in test_cases { case.run_test(); From 1b870c55930edc4259fce51d855e1668710f67ef Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Jul 2024 06:03:11 +0000 Subject: [PATCH 004/322] chore(deps): bump assert_cmd from 2.0.14 to 2.0.15 Bumps [assert_cmd](https://github.com/assert-rs/assert_cmd) from 2.0.14 to 2.0.15. - [Changelog](https://github.com/assert-rs/assert_cmd/blob/master/CHANGELOG.md) - [Commits](https://github.com/assert-rs/assert_cmd/compare/v2.0.14...v2.0.15) --- updated-dependencies: - dependency-name: assert_cmd dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- crates/test-macros/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f87cda7bb..c41d8bfef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -111,9 +111,9 @@ checksum = "25bdb32cbbdce2b519a9cd7df3a678443100e265d5e25ca763b7572a5104f5f3" [[package]] name = "assert_cmd" -version = "2.0.14" +version = "2.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed72493ac66d5804837f480ab3766c72bdfab91a65e565fc54fa9e42db0073a8" +checksum = "bc65048dd435533bb1baf2ed9956b9a278fbfdcf90301b39ee117f06c0199d37" dependencies = [ "anstyle", "bstr", diff --git a/crates/test-macros/Cargo.toml b/crates/test-macros/Cargo.toml index 0516322ad..a59381409 100644 --- a/crates/test-macros/Cargo.toml +++ b/crates/test-macros/Cargo.toml @@ -20,4 +20,4 @@ tokio = { version = "1", features = ["rt-multi-thread", "macros"] } workspace-hack = { version = "0.1", path = "../../workspace-hack" } [dev-dependencies] -assert_cmd = "2.0.14" +assert_cmd = "2.0.15" From 3095d48b455f06c0dafcb5d48d0e0d2a4c2e2db3 Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Fri, 12 Jul 2024 09:45:48 +0800 Subject: [PATCH 005/322] refactor(client)!: refactor AuthClient::user_add Signed-off-by: lxl66566 --- crates/xline-client/examples/auth_user.rs | 8 ++-- crates/xline-client/src/clients/auth.rs | 46 +++++++++++++++-------- crates/xline-client/src/types/auth.rs | 37 ------------------ crates/xline-client/tests/it/auth.rs | 8 ++-- crates/xline-test-utils/src/lib.rs | 8 ++-- crates/xline/tests/it/auth_test.rs | 14 ++----- crates/xlinectl/src/command/user/add.rs | 43 ++++++++++++++------- 7 files changed, 74 insertions(+), 90 deletions(-) diff --git a/crates/xline-client/examples/auth_user.rs b/crates/xline-client/examples/auth_user.rs index 416135834..01482c1f0 100644 --- a/crates/xline-client/examples/auth_user.rs +++ b/crates/xline-client/examples/auth_user.rs @@ -1,8 +1,8 @@ use anyhow::Result; use xline_client::{ types::auth::{ - AuthUserAddRequest, AuthUserChangePasswordRequest, AuthUserDeleteRequest, - AuthUserGetRequest, AuthUserGrantRoleRequest, AuthUserRevokeRoleRequest, + AuthUserChangePasswordRequest, AuthUserDeleteRequest, AuthUserGetRequest, + AuthUserGrantRoleRequest, AuthUserRevokeRoleRequest, }, Client, ClientOptions, }; @@ -17,8 +17,8 @@ async fn main() -> Result<()> { .auth_client(); // add user - client.user_add(AuthUserAddRequest::new("user1")).await?; - client.user_add(AuthUserAddRequest::new("user2")).await?; + client.user_add("user1", "", true).await?; + client.user_add("user2", "", true).await?; // change user1's password to "123" client diff --git a/crates/xline-client/src/clients/auth.rs b/crates/xline-client/src/clients/auth.rs index 6413ec9fe..cf6d8fc4f 100644 --- a/crates/xline-client/src/clients/auth.rs +++ b/crates/xline-client/src/clients/auth.rs @@ -15,7 +15,7 @@ use crate::{ error::{Result, XlineClientError}, types::auth::{ AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGetRequest, - AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, AuthUserAddRequest, + AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, AuthUserChangePasswordRequest, AuthUserDeleteRequest, AuthUserGetRequest, AuthUserGrantRoleRequest, AuthUserRevokeRoleRequest, }, @@ -207,15 +207,19 @@ impl AuthClient { } /// Add an user. + /// Set password to empty String if you want to create a user without password. /// /// # Errors /// - /// This function will return an error if the inner CURP client encountered a propose failure + /// This function will return an error if the inner CURP client encountered a propose failure; + /// + /// Returns `XlineClientError::InvalidArgs` if the user name is empty, + /// or the password is empty when `allow_no_password` is false. /// /// # Examples /// /// ```no_run - /// use xline_client::{types::auth::AuthUserAddRequest, Client, ClientOptions}; + /// use xline_client::{Client, ClientOptions}; /// use anyhow::Result; /// /// #[tokio::main] @@ -226,33 +230,43 @@ impl AuthClient { /// .await? /// .auth_client(); /// - /// client.user_add(AuthUserAddRequest::new("user1")).await?; + /// client.user_add("user1", "", true).await?; /// Ok(()) /// } ///``` #[inline] - pub async fn user_add(&self, mut request: AuthUserAddRequest) -> Result { - if request.inner.name.is_empty() { + pub async fn user_add( + &self, + name: impl Into, + password: impl AsRef, + allow_no_password: bool, + ) -> Result { + let name = name.into(); + let password: &str = password.as_ref(); + if name.is_empty() { return Err(XlineClientError::InvalidArgs(String::from( "user name is empty", ))); } - let need_password = request - .inner - .options - .as_ref() - .map_or(true, |o| !o.no_password); - if need_password && request.inner.password.is_empty() { + if !allow_no_password && password.is_empty() { return Err(XlineClientError::InvalidArgs(String::from( "password is required but not provided", ))); } - let hashed_password = hash_password(request.inner.password.as_bytes()).map_err(|err| { + let hashed_password = hash_password(password.as_bytes()).map_err(|err| { XlineClientError::InternalError(format!("Failed to hash password: {err}")) })?; - request.inner.hashed_password = hashed_password; - request.inner.password = String::new(); - self.handle_req(request.inner, false).await + let options = allow_no_password.then_some(xlineapi::UserAddOptions { no_password: true }); + self.handle_req( + xlineapi::AuthUserAddRequest { + name, + password: String::new(), + hashed_password, + options, + }, + false, + ) + .await } /// Gets the user info by the user name. diff --git a/crates/xline-client/src/types/auth.rs b/crates/xline-client/src/types/auth.rs index b51299bce..e874c0165 100644 --- a/crates/xline-client/src/types/auth.rs +++ b/crates/xline-client/src/types/auth.rs @@ -8,43 +8,6 @@ pub use xlineapi::{ AuthenticateResponse, Type as PermissionType, }; -/// Request for `Authenticate` -#[derive(Debug, PartialEq)] -pub struct AuthUserAddRequest { - /// Inner request - pub(crate) inner: xlineapi::AuthUserAddRequest, -} - -impl AuthUserAddRequest { - /// Creates a new `AuthUserAddRequest`. - #[inline] - pub fn new(user_name: impl Into) -> Self { - Self { - inner: xlineapi::AuthUserAddRequest { - name: user_name.into(), - options: Some(xlineapi::UserAddOptions { no_password: true }), - ..Default::default() - }, - } - } - - /// Sets the password. - #[inline] - #[must_use] - pub fn with_pwd(mut self, password: impl Into) -> Self { - self.inner.password = password.into(); - self.inner.options = Some(xlineapi::UserAddOptions { no_password: false }); - self - } -} - -impl From for xlineapi::AuthUserAddRequest { - #[inline] - fn from(req: AuthUserAddRequest) -> Self { - req.inner - } -} - /// Request for `AuthUserGet` #[derive(Debug, PartialEq)] pub struct AuthUserGetRequest { diff --git a/crates/xline-client/tests/it/auth.rs b/crates/xline-client/tests/it/auth.rs index 83a191691..8fc09c0b5 100644 --- a/crates/xline-client/tests/it/auth.rs +++ b/crates/xline-client/tests/it/auth.rs @@ -3,7 +3,7 @@ use xline_client::{ error::Result, types::auth::{ AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGetRequest, - AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, AuthUserAddRequest, + AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, AuthUserChangePasswordRequest, AuthUserDeleteRequest, AuthUserGetRequest, AuthUserGrantRoleRequest, AuthUserRevokeRoleRequest, Permission, PermissionType, }, @@ -128,9 +128,7 @@ async fn user_operations_should_success_in_normal_path() -> Result<()> { let password1 = "pwd1"; let password2 = "pwd2"; - client - .user_add(AuthUserAddRequest::new(name1).with_pwd(password1)) - .await?; + client.user_add(name1, password1, false).await?; client.user_get(AuthUserGetRequest::new(name1)).await?; let user_list_resp = client.user_list().await?; @@ -160,7 +158,7 @@ async fn user_role_operations_should_success_in_normal_path() -> Result<()> { let role1 = "role1"; let role2 = "role2"; - client.user_add(AuthUserAddRequest::new(name1)).await?; + client.user_add(name1, "", true).await?; client.role_add(AuthRoleAddRequest::new(role1)).await?; client.role_add(AuthRoleAddRequest::new(role2)).await?; diff --git a/crates/xline-test-utils/src/lib.rs b/crates/xline-test-utils/src/lib.rs index 624b7f32b..ffd5258b1 100644 --- a/crates/xline-test-utils/src/lib.rs +++ b/crates/xline-test-utils/src/lib.rs @@ -15,8 +15,8 @@ use utils::config::{ }; use xline::server::XlineServer; use xline_client::types::auth::{ - AuthRoleAddRequest, AuthRoleGrantPermissionRequest, AuthUserAddRequest, - AuthUserGrantRoleRequest, Permission, PermissionType, + AuthRoleAddRequest, AuthRoleGrantPermissionRequest, AuthUserGrantRoleRequest, Permission, + PermissionType, }; pub use xline_client::{clients, types, Client, ClientOptions}; @@ -348,9 +348,7 @@ pub async fn set_user( range_end: &[u8], ) -> Result<(), Box> { let client = client.auth_client(); - client - .user_add(AuthUserAddRequest::new(name).with_pwd(password)) - .await?; + client.user_add(name, password, false).await?; client.role_add(AuthRoleAddRequest::new(role)).await?; client .user_grant_role(AuthUserGrantRoleRequest::new(name, role)) diff --git a/crates/xline/tests/it/auth_test.rs b/crates/xline/tests/it/auth_test.rs index efcdf9e7f..4ab044460 100644 --- a/crates/xline/tests/it/auth_test.rs +++ b/crates/xline/tests/it/auth_test.rs @@ -8,7 +8,7 @@ use utils::config::{ use xline_test_utils::{ enable_auth, set_user, types::{ - auth::{AuthRoleDeleteRequest, AuthUserAddRequest, AuthUserGetRequest}, + auth::{AuthRoleDeleteRequest, AuthUserGetRequest}, kv::RangeRequest, }, Client, ClientOptions, Cluster, @@ -73,9 +73,7 @@ async fn test_auth_revision() -> Result<(), Box> { client.kv_client().put("foo", "bar", None).await?; - let user_add_resp = auth_client - .user_add(AuthUserAddRequest::new("root").with_pwd("123")) - .await?; + let user_add_resp = auth_client.user_add("root", "123", false).await?; let auth_rev = user_add_resp.header.unwrap().revision; assert_eq!(auth_rev, 2); @@ -181,16 +179,12 @@ async fn test_no_root_user_do_admin_ops() -> Result<(), Box> { .await? .auth_client(); - let result = user_client - .user_add(AuthUserAddRequest::new("u2").with_pwd("123")) - .await; + let result = user_client.user_add("u2", "123", false).await; assert!( result.is_err(), "normal user should not allow to add user when auth is enabled: {result:?}" ); - let result = root_client - .user_add(AuthUserAddRequest::new("u2").with_pwd("123")) - .await; + let result = root_client.user_add("u2", "123", false).await; assert!(result.is_ok(), "root user failed to add user: {result:?}"); Ok(()) diff --git a/crates/xlinectl/src/command/user/add.rs b/crates/xlinectl/src/command/user/add.rs index 5c7071972..e133b9430 100644 --- a/crates/xlinectl/src/command/user/add.rs +++ b/crates/xlinectl/src/command/user/add.rs @@ -1,7 +1,14 @@ +use crate::utils::printer::Printer; use clap::{arg, ArgMatches, Command}; -use xline_client::{error::Result, types::auth::AuthUserAddRequest, Client}; +use xline_client::{error::Result, Client}; -use crate::utils::printer::Printer; +/// Parameters of `AuthClient::user_add`. +/// +/// The first parameter is the name of the user. +/// The second parameter is the password of the user. If the user has no password, set it to empty string. +/// The third parameter is whether the user could has no password. +/// If set, the user is allowed to have no password. +type AuthUserAddRequest = (String, String, bool); /// Definition of `add` command pub(super) fn command() -> Command { @@ -9,7 +16,7 @@ pub(super) fn command() -> Command { .about("Add a new user") .arg(arg!( "The name of the user")) .arg( - arg!([password] "Password of the user") + arg!([password] "Password of the user, set to empty string if the user has no password") .required_if_eq("no_password", "false") .required_unless_present("no_password"), ) @@ -18,20 +25,30 @@ pub(super) fn command() -> Command { /// Build request from matches pub(super) fn build_request(matches: &ArgMatches) -> AuthUserAddRequest { - let name = matches.get_one::("name").expect("required"); + let name = matches + .get_one::("name") + .expect("required") + .to_owned(); let no_password = matches.get_flag("no_password"); - if no_password { - AuthUserAddRequest::new(name) - } else { - let password = matches.get_one::("password").expect("required"); - AuthUserAddRequest::new(name).with_pwd(password) - } + + ( + name, + if no_password { + String::new() + } else { + matches + .get_one::("password") + .expect("required") + .to_owned() + }, + no_password, + ) } /// Execute the command pub(super) async fn execute(client: &mut Client, matches: &ArgMatches) -> Result<()> { let req = build_request(matches); - let resp = client.auth_client().user_add(req).await?; + let resp = client.auth_client().user_add(req.0, req.1, req.2).await?; resp.print(); Ok(()) @@ -49,11 +66,11 @@ mod tests { let test_cases = vec![ TestCase::new( vec!["add", "JaneSmith", "password123"], - Some(AuthUserAddRequest::new("JaneSmith").with_pwd("password123")), + Some(("JaneSmith".into(), "password123".into(), false)), ), TestCase::new( vec!["add", "--no_password", "BobJohnson"], - Some(AuthUserAddRequest::new("BobJohnson")), + Some(("BobJohnson".into(), String::new(), true)), ), ]; From 4e8b2881d8b95dd23cea6fc0bc4452e519885b57 Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Sun, 14 Jul 2024 15:45:53 +0800 Subject: [PATCH 006/322] refactor(client)!: refactor AuthClient::user_get Signed-off-by: lxl66566 --- crates/xline-client/examples/auth_user.rs | 6 +++--- crates/xline-client/src/clients/auth.rs | 13 ++++++------ crates/xline-client/src/types/auth.rs | 26 ----------------------- crates/xline-client/tests/it/auth.rs | 13 +++++------- crates/xline/tests/it/auth_test.rs | 9 +++----- crates/xlinectl/src/command/user/get.rs | 19 ++++++----------- 6 files changed, 24 insertions(+), 62 deletions(-) diff --git a/crates/xline-client/examples/auth_user.rs b/crates/xline-client/examples/auth_user.rs index 01482c1f0..bc9f8acd2 100644 --- a/crates/xline-client/examples/auth_user.rs +++ b/crates/xline-client/examples/auth_user.rs @@ -1,8 +1,8 @@ use anyhow::Result; use xline_client::{ types::auth::{ - AuthUserChangePasswordRequest, AuthUserDeleteRequest, AuthUserGetRequest, - AuthUserGrantRoleRequest, AuthUserRevokeRoleRequest, + AuthUserChangePasswordRequest, AuthUserDeleteRequest, AuthUserGrantRoleRequest, + AuthUserRevokeRoleRequest, }, Client, ClientOptions, }; @@ -37,7 +37,7 @@ async fn main() -> Result<()> { let resp = client.user_list().await?; for user in resp.users { println!("user: {}", user); - let get_resp = client.user_get(AuthUserGetRequest::new(user)).await?; + let get_resp = client.user_get(user).await?; println!("roles:"); for role in get_resp.roles.iter() { print!("{} ", role); diff --git a/crates/xline-client/src/clients/auth.rs b/crates/xline-client/src/clients/auth.rs index cf6d8fc4f..a4fd7955e 100644 --- a/crates/xline-client/src/clients/auth.rs +++ b/crates/xline-client/src/clients/auth.rs @@ -16,8 +16,8 @@ use crate::{ types::auth::{ AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGetRequest, AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, - AuthUserChangePasswordRequest, AuthUserDeleteRequest, AuthUserGetRequest, - AuthUserGrantRoleRequest, AuthUserRevokeRoleRequest, + AuthUserChangePasswordRequest, AuthUserDeleteRequest, AuthUserGrantRoleRequest, + AuthUserRevokeRoleRequest, }, AuthService, CurpClient, }; @@ -278,7 +278,7 @@ impl AuthClient { /// # Examples /// /// ```no_run - /// use xline_client::{types::auth::AuthUserGetRequest, Client, ClientOptions}; + /// use xline_client::{Client, ClientOptions}; /// use anyhow::Result; /// /// #[tokio::main] @@ -289,7 +289,7 @@ impl AuthClient { /// .await? /// .auth_client(); /// - /// let resp = client.user_get(AuthUserGetRequest::new("user")).await?; + /// let resp = client.user_get("user").await?; /// /// for role in resp.roles { /// print!("{} ", role); @@ -299,8 +299,9 @@ impl AuthClient { /// } ///``` #[inline] - pub async fn user_get(&self, request: AuthUserGetRequest) -> Result { - self.handle_req(request.inner, true).await + pub async fn user_get(&self, name: impl Into) -> Result { + self.handle_req(xlineapi::AuthUserGetRequest { name: name.into() }, true) + .await } /// Lists all users. diff --git a/crates/xline-client/src/types/auth.rs b/crates/xline-client/src/types/auth.rs index e874c0165..8fc5ed504 100644 --- a/crates/xline-client/src/types/auth.rs +++ b/crates/xline-client/src/types/auth.rs @@ -8,32 +8,6 @@ pub use xlineapi::{ AuthenticateResponse, Type as PermissionType, }; -/// Request for `AuthUserGet` -#[derive(Debug, PartialEq)] -pub struct AuthUserGetRequest { - /// Inner request - pub(crate) inner: xlineapi::AuthUserGetRequest, -} - -impl AuthUserGetRequest { - /// Creates a new `AuthUserGetRequest`. - #[inline] - pub fn new(user_name: impl Into) -> Self { - Self { - inner: xlineapi::AuthUserGetRequest { - name: user_name.into(), - }, - } - } -} - -impl From for xlineapi::AuthUserGetRequest { - #[inline] - fn from(req: AuthUserGetRequest) -> Self { - req.inner - } -} - /// Request for `AuthUserDelete` #[derive(Debug, PartialEq)] pub struct AuthUserDeleteRequest { diff --git a/crates/xline-client/tests/it/auth.rs b/crates/xline-client/tests/it/auth.rs index 8fc09c0b5..4962f2ae0 100644 --- a/crates/xline-client/tests/it/auth.rs +++ b/crates/xline-client/tests/it/auth.rs @@ -4,8 +4,8 @@ use xline_client::{ types::auth::{ AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGetRequest, AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, - AuthUserChangePasswordRequest, AuthUserDeleteRequest, AuthUserGetRequest, - AuthUserGrantRoleRequest, AuthUserRevokeRoleRequest, Permission, PermissionType, + AuthUserChangePasswordRequest, AuthUserDeleteRequest, AuthUserGrantRoleRequest, + AuthUserRevokeRoleRequest, Permission, PermissionType, }, }; @@ -129,7 +129,7 @@ async fn user_operations_should_success_in_normal_path() -> Result<()> { let password2 = "pwd2"; client.user_add(name1, password1, false).await?; - client.user_get(AuthUserGetRequest::new(name1)).await?; + client.user_get(name1).await?; let user_list_resp = client.user_list().await?; assert!(user_list_resp.users.contains(&name1.to_string())); @@ -141,10 +141,7 @@ async fn user_operations_should_success_in_normal_path() -> Result<()> { client .user_delete(AuthUserDeleteRequest::new(name1)) .await?; - client - .user_get(AuthUserGetRequest::new(name1)) - .await - .unwrap_err(); + client.user_get(name1).await.unwrap_err(); Ok(()) } @@ -169,7 +166,7 @@ async fn user_role_operations_should_success_in_normal_path() -> Result<()> { .user_grant_role(AuthUserGrantRoleRequest::new(name1, role2)) .await?; - let user_get_resp = client.user_get(AuthUserGetRequest::new(name1)).await?; + let user_get_resp = client.user_get(name1).await?; assert_eq!( user_get_resp.roles, vec![role1.to_owned(), role2.to_owned()] diff --git a/crates/xline/tests/it/auth_test.rs b/crates/xline/tests/it/auth_test.rs index 4ab044460..c72a598da 100644 --- a/crates/xline/tests/it/auth_test.rs +++ b/crates/xline/tests/it/auth_test.rs @@ -7,10 +7,7 @@ use utils::config::{ }; use xline_test_utils::{ enable_auth, set_user, - types::{ - auth::{AuthRoleDeleteRequest, AuthUserGetRequest}, - kv::RangeRequest, - }, + types::{auth::AuthRoleDeleteRequest, kv::RangeRequest}, Client, ClientOptions, Cluster, }; @@ -146,12 +143,12 @@ async fn test_role_delete() -> Result<(), Box> { let client = cluster.client().await; let auth_client = client.auth_client(); set_user(client, "u", "123", "r", b"foo", &[]).await?; - let user = auth_client.user_get(AuthUserGetRequest::new("u")).await?; + let user = auth_client.user_get("u").await?; assert_eq!(user.roles.len(), 1); auth_client .role_delete(AuthRoleDeleteRequest::new("r")) .await?; - let user = auth_client.user_get(AuthUserGetRequest::new("u")).await?; + let user = auth_client.user_get("u").await?; assert_eq!(user.roles.len(), 0); Ok(()) diff --git a/crates/xlinectl/src/command/user/get.rs b/crates/xlinectl/src/command/user/get.rs index d9247741b..cff34524f 100644 --- a/crates/xlinectl/src/command/user/get.rs +++ b/crates/xlinectl/src/command/user/get.rs @@ -1,9 +1,5 @@ use clap::{arg, ArgMatches, Command}; -use xline_client::{ - error::Result, - types::auth::{AuthRoleGetRequest, AuthUserGetRequest}, - Client, -}; +use xline_client::{error::Result, types::auth::AuthRoleGetRequest, Client}; use crate::utils::printer::Printer; @@ -16,9 +12,9 @@ pub(super) fn command() -> Command { } /// Build request from matches -pub(super) fn build_request(matches: &ArgMatches) -> AuthUserGetRequest { +pub(super) fn build_request(matches: &ArgMatches) -> String { let name = matches.get_one::("name").expect("required"); - AuthUserGetRequest::new(name.as_str()) + name.to_owned() } /// Execute the command @@ -50,18 +46,15 @@ mod tests { use super::*; use crate::test_case_struct; - test_case_struct!(AuthUserGetRequest); + test_case_struct!(String); #[test] fn command_parse_should_be_valid() { let test_cases = vec![ - TestCase::new( - vec!["get", "JohnDoe"], - Some(AuthUserGetRequest::new("JohnDoe")), - ), + TestCase::new(vec!["get", "JohnDoe"], Some("JohnDoe".into())), TestCase::new( vec!["get", "--detail", "JaneSmith"], - Some(AuthUserGetRequest::new("JaneSmith")), + Some("JaneSmith".into()), ), ]; From 0658707ff36c1689f2f2eba7dcbd629567dddc69 Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Sun, 14 Jul 2024 16:06:01 +0800 Subject: [PATCH 007/322] refactor(client)!: refactor AuthClient::user_delete Signed-off-by: lxl66566 --- crates/xline-client/examples/auth_user.rs | 11 +++------ crates/xline-client/src/clients/auth.rs | 19 +++++----------- crates/xline-client/src/types/auth.rs | 26 ---------------------- crates/xline-client/tests/it/auth.rs | 8 +++---- crates/xlinectl/src/command/user/delete.rs | 10 ++++----- 5 files changed, 16 insertions(+), 58 deletions(-) diff --git a/crates/xline-client/examples/auth_user.rs b/crates/xline-client/examples/auth_user.rs index bc9f8acd2..bcfcfc0f6 100644 --- a/crates/xline-client/examples/auth_user.rs +++ b/crates/xline-client/examples/auth_user.rs @@ -1,8 +1,7 @@ use anyhow::Result; use xline_client::{ types::auth::{ - AuthUserChangePasswordRequest, AuthUserDeleteRequest, AuthUserGrantRoleRequest, - AuthUserRevokeRoleRequest, + AuthUserChangePasswordRequest, AuthUserGrantRoleRequest, AuthUserRevokeRoleRequest, }, Client, ClientOptions, }; @@ -54,12 +53,8 @@ async fn main() -> Result<()> { .await?; // delete users - client - .user_delete(AuthUserDeleteRequest::new("user1")) - .await?; - client - .user_delete(AuthUserDeleteRequest::new("user2")) - .await?; + client.user_delete("user1").await?; + client.user_delete("user2").await?; Ok(()) } diff --git a/crates/xline-client/src/clients/auth.rs b/crates/xline-client/src/clients/auth.rs index a4fd7955e..ce0289428 100644 --- a/crates/xline-client/src/clients/auth.rs +++ b/crates/xline-client/src/clients/auth.rs @@ -16,8 +16,7 @@ use crate::{ types::auth::{ AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGetRequest, AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, - AuthUserChangePasswordRequest, AuthUserDeleteRequest, AuthUserGrantRoleRequest, - AuthUserRevokeRoleRequest, + AuthUserChangePasswordRequest, AuthUserGrantRoleRequest, AuthUserRevokeRoleRequest, }, AuthService, CurpClient, }; @@ -359,23 +358,15 @@ impl AuthClient { /// .await? /// .auth_client(); /// - /// // add the user - /// - /// let resp = client.user_list().await?; - /// - /// for user in resp.users { - /// println!("user: {}", user); - /// } + /// let resp = client.user_delete("user").await?; /// /// Ok(()) /// } ///``` #[inline] - pub async fn user_delete( - &self, - request: AuthUserDeleteRequest, - ) -> Result { - self.handle_req(request.inner, false).await + pub async fn user_delete(&self, name: impl Into) -> Result { + self.handle_req(xlineapi::AuthUserDeleteRequest { name: name.into() }, false) + .await } /// Change password for an user. diff --git a/crates/xline-client/src/types/auth.rs b/crates/xline-client/src/types/auth.rs index 8fc5ed504..042e60bf2 100644 --- a/crates/xline-client/src/types/auth.rs +++ b/crates/xline-client/src/types/auth.rs @@ -8,32 +8,6 @@ pub use xlineapi::{ AuthenticateResponse, Type as PermissionType, }; -/// Request for `AuthUserDelete` -#[derive(Debug, PartialEq)] -pub struct AuthUserDeleteRequest { - /// Inner request - pub(crate) inner: xlineapi::AuthUserDeleteRequest, -} - -impl AuthUserDeleteRequest { - /// Creates a new `AuthUserDeleteRequest`. - #[inline] - pub fn new(user_name: impl Into) -> Self { - Self { - inner: xlineapi::AuthUserDeleteRequest { - name: user_name.into(), - }, - } - } -} - -impl From for xlineapi::AuthUserDeleteRequest { - #[inline] - fn from(req: AuthUserDeleteRequest) -> Self { - req.inner - } -} - /// Request for `AuthUserChangePassword` #[derive(Debug, PartialEq)] pub struct AuthUserChangePasswordRequest { diff --git a/crates/xline-client/tests/it/auth.rs b/crates/xline-client/tests/it/auth.rs index 4962f2ae0..0902deb3d 100644 --- a/crates/xline-client/tests/it/auth.rs +++ b/crates/xline-client/tests/it/auth.rs @@ -4,8 +4,8 @@ use xline_client::{ types::auth::{ AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGetRequest, AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, - AuthUserChangePasswordRequest, AuthUserDeleteRequest, AuthUserGrantRoleRequest, - AuthUserRevokeRoleRequest, Permission, PermissionType, + AuthUserChangePasswordRequest, AuthUserGrantRoleRequest, AuthUserRevokeRoleRequest, + Permission, PermissionType, }, }; @@ -138,9 +138,7 @@ async fn user_operations_should_success_in_normal_path() -> Result<()> { .user_change_password(AuthUserChangePasswordRequest::new(name1, password2)) .await?; - client - .user_delete(AuthUserDeleteRequest::new(name1)) - .await?; + client.user_delete(name1).await?; client.user_get(name1).await.unwrap_err(); Ok(()) diff --git a/crates/xlinectl/src/command/user/delete.rs b/crates/xlinectl/src/command/user/delete.rs index 1f170c833..f848702dc 100644 --- a/crates/xlinectl/src/command/user/delete.rs +++ b/crates/xlinectl/src/command/user/delete.rs @@ -1,5 +1,5 @@ use clap::{arg, ArgMatches, Command}; -use xline_client::{error::Result, types::auth::AuthUserDeleteRequest, Client}; +use xline_client::{error::Result, Client}; use crate::utils::printer::Printer; @@ -11,9 +11,9 @@ pub(super) fn command() -> Command { } /// Build request from matches -pub(super) fn build_request(matches: &ArgMatches) -> AuthUserDeleteRequest { +pub(super) fn build_request(matches: &ArgMatches) -> String { let name = matches.get_one::("name").expect("required"); - AuthUserDeleteRequest::new(name) + name.to_owned() } /// Execute the command @@ -30,13 +30,13 @@ mod tests { use super::*; use crate::test_case_struct; - test_case_struct!(AuthUserDeleteRequest); + test_case_struct!(String); #[test] fn command_parse_should_be_valid() { let test_cases = vec![TestCase::new( vec!["delete", "JohnDoe"], - Some(AuthUserDeleteRequest::new("JohnDoe")), + Some("JohnDoe".into()), )]; for case in test_cases { From 1a3b973269c4ebc1d5fc9112aaf45e7ef5585519 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 30 Jul 2024 18:53:39 +0800 Subject: [PATCH 008/322] refactor: use a custom `OptionalU64` to replace Option Make it competible with gRPC implementations that do not support the `optional` keyword Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/proto/common | 2 +- crates/curp/src/client/mod.rs | 15 +++++++----- crates/curp/src/client/retry.rs | 10 ++++++-- crates/curp/src/client/state.rs | 6 ++++- crates/curp/src/client/stream.rs | 5 +++- crates/curp/src/client/tests.rs | 28 +++++++++++----------- crates/curp/src/rpc/mod.rs | 29 +++++++++++++++++++++-- crates/curp/tests/it/common/curp_group.rs | 4 ++-- crates/simulation/src/curp_group.rs | 2 +- 9 files changed, 71 insertions(+), 30 deletions(-) diff --git a/crates/curp/proto/common b/crates/curp/proto/common index 7e2813c48..2d81c8f0b 160000 --- a/crates/curp/proto/common +++ b/crates/curp/proto/common @@ -1 +1 @@ -Subproject commit 7e2813c48513235e87e64b9f23fe933c9a13cec4 +Subproject commit 2d81c8f0b167ad962eeb8c4c55e5ee2a14eb98e6 diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 92aa8c4ae..740509c56 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -106,13 +106,16 @@ pub trait ClientApi { async fn fetch_leader_id(&self, linearizable: bool) -> Result { if linearizable { let resp = self.fetch_cluster(true).await?; - return Ok(resp.leader_id.unwrap_or_else(|| { - unreachable!("linearizable fetch cluster should return a leader id") - })); + return Ok(resp + .leader_id + .unwrap_or_else(|| { + unreachable!("linearizable fetch cluster should return a leader id") + }) + .into()); } let resp = self.fetch_cluster(false).await?; if let Some(id) = resp.leader_id { - return Ok(id); + return Ok(id.into()); } debug!("no leader id in FetchClusterResponse, try to send linearizable request"); // fallback to linearizable fetch @@ -285,8 +288,8 @@ impl ClientBuilder { match r { Ok(r) => { self.cluster_version = Some(r.cluster_version); - if let Some(id) = r.leader_id { - self.leader_state = Some((id, r.term)); + if let Some(ref id) = r.leader_id { + self.leader_state = Some((id.into(), r.term)); } self.all_members = if self.is_raw_curp { Some(r.into_peer_urls()) diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 9c716341b..80e5d515a 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -174,8 +174,14 @@ where } // update the leader state if got Redirect - CurpError::Redirect(Redirect { leader_id, term }) => { - let _ig = self.inner.update_leader(leader_id, term).await; + CurpError::Redirect(Redirect { + ref leader_id, + term, + }) => { + let _ig = self + .inner + .update_leader(leader_id.as_ref().map(Into::into), term) + .await; } } diff --git a/crates/curp/src/client/state.rs b/crates/curp/src/client/state.rs index 390169581..074550145 100644 --- a/crates/curp/src/client/state.rs +++ b/crates/curp/src/client/state.rs @@ -242,7 +242,11 @@ impl State { res: &FetchClusterResponse, ) -> Result<(), tonic::transport::Error> { let mut state = self.mutable.write().await; - if !self.check_and_update_leader_inner(&mut state, res.leader_id, res.term) { + if !self.check_and_update_leader_inner( + &mut state, + res.leader_id.as_ref().map(Into::into), + res.term, + ) { return Ok(()); } if state.cluster_version == res.cluster_version { diff --git a/crates/curp/src/client/stream.rs b/crates/curp/src/client/stream.rs index 30dca8f88..a15c7b3c6 100644 --- a/crates/curp/src/client/stream.rs +++ b/crates/curp/src/client/stream.rs @@ -76,7 +76,10 @@ impl Streaming { #[allow(clippy::wildcard_enum_match_arm)] match err { CurpError::Redirect(Redirect { leader_id, term }) => { - let _ig = self.state.check_and_update_leader(leader_id, term).await; + let _ig = self + .state + .check_and_update_leader(leader_id.map(Into::into), term) + .await; } CurpError::WrongClusterVersion(()) => { warn!( diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index 06807203b..c61125d11 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -82,7 +82,7 @@ async fn test_unary_fetch_clusters_serializable() { let connects = init_mocked_connects(3, |_id, conn| { conn.expect_fetch_cluster().return_once(|_req, _timeout| { Ok(tonic::Response::new(FetchClusterResponse { - leader_id: Some(0), + leader_id: Some(0.into()), term: 1, cluster_id: 123, members: vec![ @@ -119,7 +119,7 @@ async fn test_unary_fetch_clusters_serializable_local_first() { panic!("other server's `fetch_cluster` should not be invoked"); }; Ok(tonic::Response::new(FetchClusterResponse { - leader_id: Some(0), + leader_id: Some(0.into()), term: 1, cluster_id: 123, members, @@ -140,7 +140,7 @@ async fn test_unary_fetch_clusters_linearizable() { .return_once(move |_req, _timeout| { let resp = match id { 0 => FetchClusterResponse { - leader_id: Some(0), + leader_id: Some(0.into()), term: 2, cluster_id: 123, members: vec![ @@ -153,7 +153,7 @@ async fn test_unary_fetch_clusters_linearizable() { cluster_version: 1, }, 1 | 4 => FetchClusterResponse { - leader_id: Some(0), + leader_id: Some(0.into()), term: 2, cluster_id: 123, members: vec![], // linearizable read from follower returns empty members @@ -167,8 +167,8 @@ async fn test_unary_fetch_clusters_linearizable() { cluster_version: 1, }, 3 => FetchClusterResponse { - leader_id: Some(3), // imagine this node is a old leader - term: 1, // with the old term + leader_id: Some(3.into()), // imagine this node is a old leader + term: 1, // with the old term cluster_id: 123, members: vec![ Member::new(0, "S0", vec!["B0".to_owned()], [], false), @@ -206,7 +206,7 @@ async fn test_unary_fetch_clusters_linearizable_failed() { .return_once(move |_req, _timeout| { let resp = match id { 0 => FetchClusterResponse { - leader_id: Some(0), + leader_id: Some(0.into()), term: 2, cluster_id: 123, members: vec![ @@ -219,7 +219,7 @@ async fn test_unary_fetch_clusters_linearizable_failed() { cluster_version: 1, }, 1 => FetchClusterResponse { - leader_id: Some(0), + leader_id: Some(0.into()), term: 2, cluster_id: 123, members: vec![], // linearizable read from follower returns empty members @@ -233,8 +233,8 @@ async fn test_unary_fetch_clusters_linearizable_failed() { cluster_version: 1, }, 3 => FetchClusterResponse { - leader_id: Some(3), // imagine this node is a old leader - term: 1, // with the old term + leader_id: Some(3.into()), // imagine this node is a old leader + term: 1, // with the old term cluster_id: 123, members: vec![ Member::new(0, "S0", vec!["B0".to_owned()], [], false), @@ -246,8 +246,8 @@ async fn test_unary_fetch_clusters_linearizable_failed() { cluster_version: 1, }, 4 => FetchClusterResponse { - leader_id: Some(3), // imagine this node is a old follower of old leader(3) - term: 1, // with the old term + leader_id: Some(3.into()), // imagine this node is a old follower of old leader(3) + term: 1, // with the old term cluster_id: 123, members: vec![], cluster_version: 1, @@ -420,7 +420,7 @@ async fn test_unary_slow_round_fetch_leader_first() { .return_once(move |_req, _timeout| { flag_c.store(true, std::sync::atomic::Ordering::Relaxed); Ok(tonic::Response::new(FetchClusterResponse { - leader_id: Some(0), + leader_id: Some(0.into()), term: 1, cluster_id: 123, members: vec![ @@ -684,7 +684,7 @@ async fn test_retry_propose_return_retry_error() { conn.expect_fetch_cluster() .returning(move |_req, _timeout| { Ok(tonic::Response::new(FetchClusterResponse { - leader_id: Some(0), + leader_id: Some(0.into()), term: 2, cluster_id: 123, members: vec![ diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index 2cd69a50a..657789407 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -34,6 +34,7 @@ pub use self::proto::{ Member, MoveLeaderRequest, MoveLeaderResponse, + OptionalU64, ProposeConfChangeRequest, ProposeConfChangeResponse, ProposeId as PbProposeId, @@ -100,6 +101,27 @@ impl From for PbProposeId { } } +impl From for OptionalU64 { + #[inline] + fn from(value: u64) -> Self { + Self { value } + } +} + +impl From for u64 { + #[inline] + fn from(value: OptionalU64) -> Self { + value.value + } +} + +impl From<&OptionalU64> for u64 { + #[inline] + fn from(value: &OptionalU64) -> Self { + value.value + } +} + impl FetchClusterResponse { /// Create a new `FetchClusterResponse` pub(crate) fn new( @@ -110,7 +132,7 @@ impl FetchClusterResponse { cluster_version: u64, ) -> Self { Self { - leader_id, + leader_id: leader_id.map(Into::into), term, cluster_id, members, @@ -674,7 +696,10 @@ impl CurpError { /// `Redirect` error pub(crate) fn redirect(leader_id: Option, term: u64) -> Self { - Self::Redirect(Redirect { leader_id, term }) + Self::Redirect(Redirect { + leader_id: leader_id.map(Into::into), + term, + }) } /// `Internal` error diff --git a/crates/curp/tests/it/common/curp_group.rs b/crates/curp/tests/it/common/curp_group.rs index 6ff65df04..4b2400d79 100644 --- a/crates/curp/tests/it/common/curp_group.rs +++ b/crates/curp/tests/it/common/curp_group.rs @@ -434,7 +434,7 @@ impl CurpGroup { leader = leader_id; } } - leader.map(|l| (l, max_term)) + leader.map(|l| (l.value, max_term)) } pub async fn get_leader(&self) -> (ServerId, u64) { @@ -506,7 +506,7 @@ impl CurpGroup { .map(|m| Member::new(m.id, m.name, m.peer_urls, m.client_urls, m.is_learner)) .collect(); let cluster_res = curp::rpc::FetchClusterResponse { - leader_id: cluster_res_base.leader_id, + leader_id: cluster_res_base.leader_id.map(|l| l.value.into()), term: cluster_res_base.term, cluster_id: cluster_res_base.cluster_id, members, diff --git a/crates/simulation/src/curp_group.rs b/crates/simulation/src/curp_group.rs index ebca5fa2b..ce970e31f 100644 --- a/crates/simulation/src/curp_group.rs +++ b/crates/simulation/src/curp_group.rs @@ -253,7 +253,7 @@ impl CurpGroup { leader = leader_id; } } - leader.map(|l| (l, max_term)) + leader.map(|l| (l.into(), max_term)) }) .await .unwrap() From 2a4ef602df69024b4a0567b2ffa8a67eb3ed9a44 Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Mon, 15 Jul 2024 20:09:44 +0800 Subject: [PATCH 009/322] refactor(client)!: AuthClient::user_grant_role Signed-off-by: lxl66566 --- crates/xline-client/examples/auth_user.rs | 12 ++------ crates/xline-client/src/clients/auth.rs | 20 ++++++++----- crates/xline-client/src/types/auth.rs | 30 ------------------- crates/xline-client/tests/it/auth.rs | 11 ++----- crates/xline-test-utils/src/lib.rs | 7 ++--- .../xlinectl/src/command/user/grant_role.rs | 11 ++++--- 6 files changed, 28 insertions(+), 63 deletions(-) diff --git a/crates/xline-client/examples/auth_user.rs b/crates/xline-client/examples/auth_user.rs index bcfcfc0f6..c813f9ed0 100644 --- a/crates/xline-client/examples/auth_user.rs +++ b/crates/xline-client/examples/auth_user.rs @@ -1,8 +1,6 @@ use anyhow::Result; use xline_client::{ - types::auth::{ - AuthUserChangePasswordRequest, AuthUserGrantRoleRequest, AuthUserRevokeRoleRequest, - }, + types::auth::{AuthUserChangePasswordRequest, AuthUserRevokeRoleRequest}, Client, ClientOptions, }; @@ -25,12 +23,8 @@ async fn main() -> Result<()> { .await?; // grant roles - client - .user_grant_role(AuthUserGrantRoleRequest::new("user1", "role1")) - .await?; - client - .user_grant_role(AuthUserGrantRoleRequest::new("user2", "role2")) - .await?; + client.user_grant_role("user1", "role1").await?; + client.user_grant_role("user2", "role2").await?; // list all users and their roles let resp = client.user_list().await?; diff --git a/crates/xline-client/src/clients/auth.rs b/crates/xline-client/src/clients/auth.rs index ce0289428..112fcaefd 100644 --- a/crates/xline-client/src/clients/auth.rs +++ b/crates/xline-client/src/clients/auth.rs @@ -16,7 +16,7 @@ use crate::{ types::auth::{ AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGetRequest, AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, - AuthUserChangePasswordRequest, AuthUserGrantRoleRequest, AuthUserRevokeRoleRequest, + AuthUserChangePasswordRequest, AuthUserRevokeRoleRequest, }, AuthService, CurpClient, }; @@ -427,7 +427,7 @@ impl AuthClient { /// # Examples /// /// ```no_run - /// use xline_client::{types::auth::AuthUserGrantRoleRequest, Client, ClientOptions}; + /// use xline_client::{Client, ClientOptions}; /// use anyhow::Result; /// /// #[tokio::main] @@ -440,9 +440,7 @@ impl AuthClient { /// /// // add user and role /// - /// client - /// .user_grant_role(AuthUserGrantRoleRequest::new("user", "role")) - /// .await?; + /// client.user_grant_role("user", "role").await?; /// /// Ok(()) /// } @@ -450,9 +448,17 @@ impl AuthClient { #[inline] pub async fn user_grant_role( &self, - request: AuthUserGrantRoleRequest, + name: impl Into, + role: impl Into, ) -> Result { - self.handle_req(request.inner, false).await + self.handle_req( + xlineapi::AuthUserGrantRoleRequest { + user: name.into(), + role: role.into(), + }, + false, + ) + .await } /// Revoke role for an user. diff --git a/crates/xline-client/src/types/auth.rs b/crates/xline-client/src/types/auth.rs index 042e60bf2..7f288b731 100644 --- a/crates/xline-client/src/types/auth.rs +++ b/crates/xline-client/src/types/auth.rs @@ -36,36 +36,6 @@ impl From for xlineapi::AuthUserChangePasswordReq } } -/// Request for `AuthUserGrantRole` -#[derive(Debug, PartialEq)] -pub struct AuthUserGrantRoleRequest { - /// Inner request - pub(crate) inner: xlineapi::AuthUserGrantRoleRequest, -} - -impl AuthUserGrantRoleRequest { - /// Creates a new `AuthUserGrantRoleRequest` - /// - /// `user_name` is the name of the user to grant role, - /// `role` is the role name to grant. - #[inline] - pub fn new(user_name: impl Into, role: impl Into) -> Self { - Self { - inner: xlineapi::AuthUserGrantRoleRequest { - user: user_name.into(), - role: role.into(), - }, - } - } -} - -impl From for xlineapi::AuthUserGrantRoleRequest { - #[inline] - fn from(req: AuthUserGrantRoleRequest) -> Self { - req.inner - } -} - /// Request for `AuthUserRevokeRole` #[derive(Debug, PartialEq)] pub struct AuthUserRevokeRoleRequest { diff --git a/crates/xline-client/tests/it/auth.rs b/crates/xline-client/tests/it/auth.rs index 0902deb3d..44db052dd 100644 --- a/crates/xline-client/tests/it/auth.rs +++ b/crates/xline-client/tests/it/auth.rs @@ -4,8 +4,7 @@ use xline_client::{ types::auth::{ AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGetRequest, AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, - AuthUserChangePasswordRequest, AuthUserGrantRoleRequest, AuthUserRevokeRoleRequest, - Permission, PermissionType, + AuthUserChangePasswordRequest, AuthUserRevokeRoleRequest, Permission, PermissionType, }, }; @@ -157,12 +156,8 @@ async fn user_role_operations_should_success_in_normal_path() -> Result<()> { client.role_add(AuthRoleAddRequest::new(role1)).await?; client.role_add(AuthRoleAddRequest::new(role2)).await?; - client - .user_grant_role(AuthUserGrantRoleRequest::new(name1, role1)) - .await?; - client - .user_grant_role(AuthUserGrantRoleRequest::new(name1, role2)) - .await?; + client.user_grant_role(name1, role1).await?; + client.user_grant_role(name1, role2).await?; let user_get_resp = client.user_get(name1).await?; assert_eq!( diff --git a/crates/xline-test-utils/src/lib.rs b/crates/xline-test-utils/src/lib.rs index ffd5258b1..cfa549351 100644 --- a/crates/xline-test-utils/src/lib.rs +++ b/crates/xline-test-utils/src/lib.rs @@ -15,8 +15,7 @@ use utils::config::{ }; use xline::server::XlineServer; use xline_client::types::auth::{ - AuthRoleAddRequest, AuthRoleGrantPermissionRequest, AuthUserGrantRoleRequest, Permission, - PermissionType, + AuthRoleAddRequest, AuthRoleGrantPermissionRequest, Permission, PermissionType, }; pub use xline_client::{clients, types, Client, ClientOptions}; @@ -350,9 +349,7 @@ pub async fn set_user( let client = client.auth_client(); client.user_add(name, password, false).await?; client.role_add(AuthRoleAddRequest::new(role)).await?; - client - .user_grant_role(AuthUserGrantRoleRequest::new(name, role)) - .await?; + client.user_grant_role(name, role).await?; if !key.is_empty() { client .role_grant_permission(AuthRoleGrantPermissionRequest::new( diff --git a/crates/xlinectl/src/command/user/grant_role.rs b/crates/xlinectl/src/command/user/grant_role.rs index 23b76408e..3646ec9fa 100644 --- a/crates/xlinectl/src/command/user/grant_role.rs +++ b/crates/xlinectl/src/command/user/grant_role.rs @@ -1,8 +1,11 @@ use clap::{arg, ArgMatches, Command}; -use xline_client::{error::Result, types::auth::AuthUserGrantRoleRequest, Client}; +use xline_client::{error::Result, Client}; use crate::utils::printer::Printer; +/// Temporary struct for testing, indicates `(user_name, role)` +type AuthUserGrantRoleRequest = (String, String); + /// Definition of `grant_role` command pub(super) fn command() -> Command { Command::new("grant_role") @@ -15,13 +18,13 @@ pub(super) fn command() -> Command { pub(super) fn build_request(matches: &ArgMatches) -> AuthUserGrantRoleRequest { let name = matches.get_one::("name").expect("required"); let role = matches.get_one::("role").expect("required"); - AuthUserGrantRoleRequest::new(name, role) + (name.into(), role.into()) } /// Execute the command pub(super) async fn execute(client: &mut Client, matches: &ArgMatches) -> Result<()> { let req = build_request(matches); - let resp = client.auth_client().user_grant_role(req).await?; + let resp = client.auth_client().user_grant_role(req.0, req.1).await?; resp.print(); Ok(()) @@ -38,7 +41,7 @@ mod tests { fn command_parse_should_be_valid() { let test_cases = vec![TestCase::new( vec!["grant_role", "JohnDoe", "Admin"], - Some(AuthUserGrantRoleRequest::new("JohnDoe", "Admin")), + Some(("JohnDoe".into(), "Admin".into())), )]; for case in test_cases { From 87f7fe7ef743c1b0dcc2d2c6a1f07a3a6abe3458 Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Mon, 15 Jul 2024 21:22:17 +0800 Subject: [PATCH 010/322] refactor(client)!: AuthClient::user_revoke_role Signed-off-by: lxl66566 --- crates/xline-client/examples/auth_user.rs | 13 ++------ crates/xline-client/src/clients/auth.rs | 20 ++++++++----- crates/xline-client/src/types/auth.rs | 30 ------------------- crates/xline-client/tests/it/auth.rs | 10 ++----- .../xlinectl/src/command/user/revoke_role.rs | 11 ++++--- 5 files changed, 26 insertions(+), 58 deletions(-) diff --git a/crates/xline-client/examples/auth_user.rs b/crates/xline-client/examples/auth_user.rs index c813f9ed0..6dc52fca0 100644 --- a/crates/xline-client/examples/auth_user.rs +++ b/crates/xline-client/examples/auth_user.rs @@ -1,8 +1,5 @@ use anyhow::Result; -use xline_client::{ - types::auth::{AuthUserChangePasswordRequest, AuthUserRevokeRoleRequest}, - Client, ClientOptions, -}; +use xline_client::{types::auth::AuthUserChangePasswordRequest, Client, ClientOptions}; #[tokio::main] async fn main() -> Result<()> { @@ -39,12 +36,8 @@ async fn main() -> Result<()> { } // revoke role from user - client - .user_revoke_role(AuthUserRevokeRoleRequest::new("user1", "role1")) - .await?; - client - .user_revoke_role(AuthUserRevokeRoleRequest::new("user2", "role2")) - .await?; + client.user_revoke_role("user1", "role1").await?; + client.user_revoke_role("user2", "role2").await?; // delete users client.user_delete("user1").await?; diff --git a/crates/xline-client/src/clients/auth.rs b/crates/xline-client/src/clients/auth.rs index 112fcaefd..6b7b6f95a 100644 --- a/crates/xline-client/src/clients/auth.rs +++ b/crates/xline-client/src/clients/auth.rs @@ -16,7 +16,7 @@ use crate::{ types::auth::{ AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGetRequest, AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, - AuthUserChangePasswordRequest, AuthUserRevokeRoleRequest, + AuthUserChangePasswordRequest, }, AuthService, CurpClient, }; @@ -470,7 +470,7 @@ impl AuthClient { /// # Examples /// /// ```no_run - /// use xline_client::{types::auth::AuthUserRevokeRoleRequest, Client, ClientOptions}; + /// use xline_client::{Client, ClientOptions}; /// use anyhow::Result; /// /// #[tokio::main] @@ -483,9 +483,7 @@ impl AuthClient { /// /// // grant role /// - /// client - /// .user_revoke_role(AuthUserRevokeRoleRequest::new("user", "role")) - /// .await?; + /// client.user_revoke_role("user", "role").await?; /// /// Ok(()) /// } @@ -493,9 +491,17 @@ impl AuthClient { #[inline] pub async fn user_revoke_role( &self, - request: AuthUserRevokeRoleRequest, + name: impl Into, + role: impl Into, ) -> Result { - self.handle_req(request.inner, false).await + self.handle_req( + xlineapi::AuthUserRevokeRoleRequest { + name: name.into(), + role: role.into(), + }, + false, + ) + .await } /// Adds role. diff --git a/crates/xline-client/src/types/auth.rs b/crates/xline-client/src/types/auth.rs index 7f288b731..1d1123aa5 100644 --- a/crates/xline-client/src/types/auth.rs +++ b/crates/xline-client/src/types/auth.rs @@ -36,36 +36,6 @@ impl From for xlineapi::AuthUserChangePasswordReq } } -/// Request for `AuthUserRevokeRole` -#[derive(Debug, PartialEq)] -pub struct AuthUserRevokeRoleRequest { - /// Inner request - pub(crate) inner: xlineapi::AuthUserRevokeRoleRequest, -} - -impl AuthUserRevokeRoleRequest { - /// Creates a new `AuthUserRevokeRoleRequest` - /// - /// `user_name` is the name of the user to revoke role, - /// `role` is the role name to revoke. - #[inline] - pub fn new(user_name: impl Into, role: impl Into) -> Self { - Self { - inner: xlineapi::AuthUserRevokeRoleRequest { - name: user_name.into(), - role: role.into(), - }, - } - } -} - -impl From for xlineapi::AuthUserRevokeRoleRequest { - #[inline] - fn from(req: AuthUserRevokeRoleRequest) -> Self { - req.inner - } -} - /// Request for `AuthRoleAdd` #[derive(Debug, PartialEq)] pub struct AuthRoleAddRequest { diff --git a/crates/xline-client/tests/it/auth.rs b/crates/xline-client/tests/it/auth.rs index 44db052dd..96e5666e0 100644 --- a/crates/xline-client/tests/it/auth.rs +++ b/crates/xline-client/tests/it/auth.rs @@ -4,7 +4,7 @@ use xline_client::{ types::auth::{ AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGetRequest, AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, - AuthUserChangePasswordRequest, AuthUserRevokeRoleRequest, Permission, PermissionType, + AuthUserChangePasswordRequest, Permission, PermissionType, }, }; @@ -165,12 +165,8 @@ async fn user_role_operations_should_success_in_normal_path() -> Result<()> { vec![role1.to_owned(), role2.to_owned()] ); - client - .user_revoke_role(AuthUserRevokeRoleRequest::new(name1, role1)) - .await?; - client - .user_revoke_role(AuthUserRevokeRoleRequest::new(name1, role2)) - .await?; + client.user_revoke_role(name1, role1).await?; + client.user_revoke_role(name1, role2).await?; Ok(()) } diff --git a/crates/xlinectl/src/command/user/revoke_role.rs b/crates/xlinectl/src/command/user/revoke_role.rs index 0b34c1dbb..f35f38a10 100644 --- a/crates/xlinectl/src/command/user/revoke_role.rs +++ b/crates/xlinectl/src/command/user/revoke_role.rs @@ -1,8 +1,11 @@ use clap::{arg, ArgMatches, Command}; -use xline_client::{error::Result, types::auth::AuthUserRevokeRoleRequest, Client}; +use xline_client::{error::Result, Client}; use crate::utils::printer::Printer; +/// Temporary struct for testing, indicates `(user_name, role)` +type AuthUserRevokeRoleRequest = (String, String); + /// Definition of `revoke_role` command pub(super) fn command() -> Command { Command::new("revoke_role") @@ -15,13 +18,13 @@ pub(super) fn command() -> Command { pub(super) fn build_request(matches: &ArgMatches) -> AuthUserRevokeRoleRequest { let name = matches.get_one::("name").expect("required"); let role = matches.get_one::("role").expect("required"); - AuthUserRevokeRoleRequest::new(name, role) + (name.to_owned(), role.to_owned()) } /// Execute the command pub(super) async fn execute(client: &mut Client, matches: &ArgMatches) -> Result<()> { let req = build_request(matches); - let resp = client.auth_client().user_revoke_role(req).await?; + let resp = client.auth_client().user_revoke_role(req.0, req.1).await?; resp.print(); Ok(()) @@ -38,7 +41,7 @@ mod tests { fn command_parse_should_be_valid() { let test_cases = vec![TestCase::new( vec!["revoke_role", "JohnDoe", "Admin"], - Some(AuthUserRevokeRoleRequest::new("JohnDoe", "Admin")), + Some(("JohnDoe".to_owned(), "Admin".to_owned())), )]; for case in test_cases { From eb7da4442784a69772b029a65277b56a6a505c37 Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Sun, 14 Jul 2024 18:43:35 +0800 Subject: [PATCH 011/322] refactor(client)!: refactor AuthClient::user_change_password Signed-off-by: lxl66566 --- crates/xline-client/examples/auth_user.rs | 6 ++--- crates/xline-client/src/clients/auth.rs | 27 ++++++++++++--------- crates/xline-client/src/types/auth.rs | 28 ---------------------- crates/xline-client/tests/it/auth.rs | 8 +++---- crates/xlinectl/src/command/user/passwd.rs | 17 +++++++------ 5 files changed, 31 insertions(+), 55 deletions(-) diff --git a/crates/xline-client/examples/auth_user.rs b/crates/xline-client/examples/auth_user.rs index 6dc52fca0..dc881f9ed 100644 --- a/crates/xline-client/examples/auth_user.rs +++ b/crates/xline-client/examples/auth_user.rs @@ -1,5 +1,5 @@ use anyhow::Result; -use xline_client::{types::auth::AuthUserChangePasswordRequest, Client, ClientOptions}; +use xline_client::{Client, ClientOptions}; #[tokio::main] async fn main() -> Result<()> { @@ -15,9 +15,7 @@ async fn main() -> Result<()> { client.user_add("user2", "", true).await?; // change user1's password to "123" - client - .user_change_password(AuthUserChangePasswordRequest::new("user1", "123")) - .await?; + client.user_change_password("user1", "123").await?; // grant roles client.user_grant_role("user1", "role1").await?; diff --git a/crates/xline-client/src/clients/auth.rs b/crates/xline-client/src/clients/auth.rs index 6b7b6f95a..b19a6f743 100644 --- a/crates/xline-client/src/clients/auth.rs +++ b/crates/xline-client/src/clients/auth.rs @@ -16,7 +16,6 @@ use crate::{ types::auth::{ AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGetRequest, AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, - AuthUserChangePasswordRequest, }, AuthService, CurpClient, }; @@ -378,9 +377,7 @@ impl AuthClient { /// # Examples /// /// ```no_run - /// use xline_client::{ - /// types::auth::AuthUserChangePasswordRequest, Client, ClientOptions, - /// }; + /// use xline_client::{Client, ClientOptions}; /// use anyhow::Result; /// /// #[tokio::main] @@ -394,7 +391,7 @@ impl AuthClient { /// // add the user /// /// client - /// .user_change_password(AuthUserChangePasswordRequest::new("user", "123")) + /// .user_change_password("user", "123") /// .await?; /// /// Ok(()) @@ -403,19 +400,27 @@ impl AuthClient { #[inline] pub async fn user_change_password( &self, - mut request: AuthUserChangePasswordRequest, + name: impl Into, + password: impl AsRef, ) -> Result { - if request.inner.password.is_empty() { + let password: &str = password.as_ref(); + if password.is_empty() { return Err(XlineClientError::InvalidArgs(String::from( "role name is empty", ))); } - let hashed_password = hash_password(request.inner.password.as_bytes()).map_err(|err| { + let hashed_password = hash_password(password.as_bytes()).map_err(|err| { XlineClientError::InternalError(format!("Failed to hash password: {err}")) })?; - request.inner.hashed_password = hashed_password; - request.inner.password = String::new(); - self.handle_req(request.inner, false).await + self.handle_req( + xlineapi::AuthUserChangePasswordRequest { + name: name.into(), + hashed_password, + password: String::new(), + }, + false, + ) + .await } /// Grant role for an user. diff --git a/crates/xline-client/src/types/auth.rs b/crates/xline-client/src/types/auth.rs index 1d1123aa5..60de798f5 100644 --- a/crates/xline-client/src/types/auth.rs +++ b/crates/xline-client/src/types/auth.rs @@ -8,34 +8,6 @@ pub use xlineapi::{ AuthenticateResponse, Type as PermissionType, }; -/// Request for `AuthUserChangePassword` -#[derive(Debug, PartialEq)] -pub struct AuthUserChangePasswordRequest { - /// Inner request - pub(crate) inner: xlineapi::AuthUserChangePasswordRequest, -} - -impl AuthUserChangePasswordRequest { - /// Creates a new `AuthUserChangePasswordRequest`. - #[inline] - pub fn new(user_name: impl Into, new_password: impl Into) -> Self { - Self { - inner: xlineapi::AuthUserChangePasswordRequest { - name: user_name.into(), - password: new_password.into(), - hashed_password: String::new(), - }, - } - } -} - -impl From for xlineapi::AuthUserChangePasswordRequest { - #[inline] - fn from(req: AuthUserChangePasswordRequest) -> Self { - req.inner - } -} - /// Request for `AuthRoleAdd` #[derive(Debug, PartialEq)] pub struct AuthRoleAddRequest { diff --git a/crates/xline-client/tests/it/auth.rs b/crates/xline-client/tests/it/auth.rs index 96e5666e0..e9fd398bc 100644 --- a/crates/xline-client/tests/it/auth.rs +++ b/crates/xline-client/tests/it/auth.rs @@ -3,8 +3,8 @@ use xline_client::{ error::Result, types::auth::{ AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGetRequest, - AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, - AuthUserChangePasswordRequest, Permission, PermissionType, + AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, Permission, + PermissionType, }, }; @@ -133,9 +133,7 @@ async fn user_operations_should_success_in_normal_path() -> Result<()> { let user_list_resp = client.user_list().await?; assert!(user_list_resp.users.contains(&name1.to_string())); - client - .user_change_password(AuthUserChangePasswordRequest::new(name1, password2)) - .await?; + client.user_change_password(name1, password2).await?; client.user_delete(name1).await?; client.user_get(name1).await.unwrap_err(); diff --git a/crates/xlinectl/src/command/user/passwd.rs b/crates/xlinectl/src/command/user/passwd.rs index 4dbd45f77..976766d42 100644 --- a/crates/xlinectl/src/command/user/passwd.rs +++ b/crates/xlinectl/src/command/user/passwd.rs @@ -1,8 +1,11 @@ use clap::{arg, ArgMatches, Command}; -use xline_client::{error::Result, types::auth::AuthUserChangePasswordRequest, Client}; +use xline_client::{error::Result, Client}; use crate::utils::printer::Printer; +/// Temporary request for changing password. 0 is name, 1 is password +type AuthUserChangePasswordRequest = (String, String); + /// Definition of `passwd` command // TODO: interactive mode pub(super) fn command() -> Command { @@ -16,13 +19,16 @@ pub(super) fn command() -> Command { pub(super) fn build_request(matches: &ArgMatches) -> AuthUserChangePasswordRequest { let name = matches.get_one::("name").expect("required"); let password = matches.get_one::("password").expect("required"); - AuthUserChangePasswordRequest::new(name, password) + (name.to_owned(), password.to_owned()) } /// Execute the command pub(super) async fn execute(client: &mut Client, matches: &ArgMatches) -> Result<()> { let req = build_request(matches); - let resp = client.auth_client().user_change_password(req).await?; + let resp = client + .auth_client() + .user_change_password(req.0, req.1) + .await?; resp.print(); Ok(()) @@ -39,10 +45,7 @@ mod tests { fn command_parse_should_be_valid() { let test_cases = vec![TestCase::new( vec!["passwd", "JohnDoe", "new_password"], - Some(AuthUserChangePasswordRequest::new( - "JohnDoe", - "new_password", - )), + Some(("JohnDoe".into(), "new_password".into())), )]; for case in test_cases { From 8a56100969b96d901817cbbe8a1938b6464459ba Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Tue, 16 Jul 2024 10:06:02 +0800 Subject: [PATCH 012/322] refactor(client)!: AuthClient::role_get Signed-off-by: lxl66566 --- crates/xline-client/examples/auth_role.rs | 7 +++--- crates/xline-client/src/clients/auth.rs | 12 +++++------ crates/xline-client/src/types/auth.rs | 26 ----------------------- crates/xline-client/tests/it/auth.rs | 23 +++++++------------- crates/xlinectl/src/command/role/get.rs | 13 +++++------- crates/xlinectl/src/command/user/get.rs | 7 ++---- 6 files changed, 24 insertions(+), 64 deletions(-) diff --git a/crates/xline-client/examples/auth_role.rs b/crates/xline-client/examples/auth_role.rs index 2319dd8ff..39199f1f9 100644 --- a/crates/xline-client/examples/auth_role.rs +++ b/crates/xline-client/examples/auth_role.rs @@ -1,9 +1,8 @@ use anyhow::Result; use xline_client::{ types::auth::{ - AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGetRequest, - AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, Permission, - PermissionType, + AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGrantPermissionRequest, + AuthRoleRevokePermissionRequest, Permission, PermissionType, }, Client, ClientOptions, }; @@ -40,7 +39,7 @@ async fn main() -> Result<()> { println!("roles:"); for role in resp.roles { println!("{}", role); - let get_resp = client.role_get(AuthRoleGetRequest::new(role)).await?; + let get_resp = client.role_get(role).await?; println!("permmisions:"); for perm in get_resp.perm { println!("{} {}", perm.perm_type, String::from_utf8_lossy(&perm.key)); diff --git a/crates/xline-client/src/clients/auth.rs b/crates/xline-client/src/clients/auth.rs index b19a6f743..7134cb763 100644 --- a/crates/xline-client/src/clients/auth.rs +++ b/crates/xline-client/src/clients/auth.rs @@ -14,8 +14,8 @@ use xlineapi::{ use crate::{ error::{Result, XlineClientError}, types::auth::{ - AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGetRequest, - AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, + AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGrantPermissionRequest, + AuthRoleRevokePermissionRequest, }, AuthService, CurpClient, }; @@ -554,7 +554,6 @@ impl AuthClient { /// # Examples /// /// ```no_run - /// use xline_client::types::auth::AuthRoleGetRequest; /// use xline_client::{Client, ClientOptions}; /// use anyhow::Result; /// @@ -566,7 +565,7 @@ impl AuthClient { /// .await? /// .auth_client(); /// - /// let resp = client.role_get(AuthRoleGetRequest::new("role")).await?; + /// let resp = client.role_get("role").await?; /// /// println!("permissions:"); /// for perm in resp.perm { @@ -577,8 +576,9 @@ impl AuthClient { /// } ///``` #[inline] - pub async fn role_get(&self, request: AuthRoleGetRequest) -> Result { - self.handle_req(request.inner, true).await + pub async fn role_get(&self, name: impl Into) -> Result { + self.handle_req(xlineapi::AuthRoleGetRequest { role: name.into() }, true) + .await } /// Lists role. diff --git a/crates/xline-client/src/types/auth.rs b/crates/xline-client/src/types/auth.rs index 60de798f5..0cd894d41 100644 --- a/crates/xline-client/src/types/auth.rs +++ b/crates/xline-client/src/types/auth.rs @@ -34,32 +34,6 @@ impl From for xlineapi::AuthRoleAddRequest { } } -/// Request for `AuthRoleGet` -#[derive(Debug, PartialEq)] -pub struct AuthRoleGetRequest { - /// Inner request - pub(crate) inner: xlineapi::AuthRoleGetRequest, -} - -impl AuthRoleGetRequest { - /// Creates a new `AuthRoleGetRequest` - /// - /// `role` is the name of the role to get. - #[inline] - pub fn new(role: impl Into) -> Self { - Self { - inner: xlineapi::AuthRoleGetRequest { role: role.into() }, - } - } -} - -impl From for xlineapi::AuthRoleGetRequest { - #[inline] - fn from(req: AuthRoleGetRequest) -> Self { - req.inner - } -} - /// Request for `AuthRoleDelete` #[derive(Debug, PartialEq)] pub struct AuthRoleDeleteRequest { diff --git a/crates/xline-client/tests/it/auth.rs b/crates/xline-client/tests/it/auth.rs index e9fd398bc..5d8d86d29 100644 --- a/crates/xline-client/tests/it/auth.rs +++ b/crates/xline-client/tests/it/auth.rs @@ -2,9 +2,8 @@ use xline_client::{ error::Result, types::auth::{ - AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGetRequest, - AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, Permission, - PermissionType, + AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGrantPermissionRequest, + AuthRoleRevokePermissionRequest, Permission, PermissionType, }, }; @@ -20,8 +19,8 @@ async fn role_operations_should_success_in_normal_path() -> Result<()> { client.role_add(AuthRoleAddRequest::new(role1)).await?; client.role_add(AuthRoleAddRequest::new(role2)).await?; - client.role_get(AuthRoleGetRequest::new(role1)).await?; - client.role_get(AuthRoleGetRequest::new(role2)).await?; + client.role_get(role1).await?; + client.role_get(role2).await?; let role_list_resp = client.role_list().await?; assert_eq!( @@ -36,14 +35,8 @@ async fn role_operations_should_success_in_normal_path() -> Result<()> { .role_delete(AuthRoleDeleteRequest::new(role2)) .await?; - client - .role_get(AuthRoleGetRequest::new(role1)) - .await - .unwrap_err(); - client - .role_get(AuthRoleGetRequest::new(role2)) - .await - .unwrap_err(); + client.role_get(role1).await.unwrap_err(); + client.role_get(role2).await.unwrap_err(); Ok(()) } @@ -79,7 +72,7 @@ async fn permission_operations_should_success_in_normal_path() -> Result<()> { .await?; { - let resp = client.role_get(AuthRoleGetRequest::new(role1)).await?; + let resp = client.role_get(role1).await?; let permissions = resp.perm; assert!(permissions.contains(&perm1.into())); assert!(permissions.contains(&perm2.into())); @@ -109,7 +102,7 @@ async fn permission_operations_should_success_in_normal_path() -> Result<()> { ) .await?; - let role_get_resp = client.role_get(AuthRoleGetRequest::new(role1)).await?; + let role_get_resp = client.role_get(role1).await?; assert!(role_get_resp.perm.is_empty()); client diff --git a/crates/xlinectl/src/command/role/get.rs b/crates/xlinectl/src/command/role/get.rs index 46c786fab..3fe7236e6 100644 --- a/crates/xlinectl/src/command/role/get.rs +++ b/crates/xlinectl/src/command/role/get.rs @@ -1,5 +1,5 @@ use clap::{arg, ArgMatches, Command}; -use xline_client::{error::Result, types::auth::AuthRoleGetRequest, Client}; +use xline_client::{error::Result, Client}; use crate::utils::printer::Printer; @@ -11,9 +11,9 @@ pub(super) fn command() -> Command { } /// Build request from matches -pub(super) fn build_request(matches: &ArgMatches) -> AuthRoleGetRequest { +pub(super) fn build_request(matches: &ArgMatches) -> String { let name = matches.get_one::("name").expect("required"); - AuthRoleGetRequest::new(name) + name.to_owned() } /// Execute the command @@ -30,14 +30,11 @@ mod tests { use super::*; use crate::test_case_struct; - test_case_struct!(AuthRoleGetRequest); + test_case_struct!(String); #[test] fn command_parse_should_be_valid() { - let test_cases = vec![TestCase::new( - vec!["get", "Admin"], - Some(AuthRoleGetRequest::new("Admin")), - )]; + let test_cases = vec![TestCase::new(vec!["get", "Admin"], Some("Admin".into()))]; for case in test_cases { case.run_test(); diff --git a/crates/xlinectl/src/command/user/get.rs b/crates/xlinectl/src/command/user/get.rs index cff34524f..c7f12f7d8 100644 --- a/crates/xlinectl/src/command/user/get.rs +++ b/crates/xlinectl/src/command/user/get.rs @@ -1,5 +1,5 @@ use clap::{arg, ArgMatches, Command}; -use xline_client::{error::Result, types::auth::AuthRoleGetRequest, Client}; +use xline_client::{error::Result, Client}; use crate::utils::printer::Printer; @@ -28,10 +28,7 @@ pub(super) async fn execute(client: &mut Client, matches: &ArgMatches) -> Result if detail { for role in resp.roles { println!("{role}"); - let resp_role_get = client - .auth_client() - .role_get(AuthRoleGetRequest::new(&role)) - .await?; + let resp_role_get = client.auth_client().role_get(role).await?; resp_role_get.print(); } } else { From 4aa38907d36ea6f9543dec982e0bb1f0eb4d9178 Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Fri, 19 Jul 2024 18:49:05 +0800 Subject: [PATCH 013/322] refactor(client): WatchClient::watch Signed-off-by: lxl66566 refactor(client): move WatchKeyOptions to range_end.rs, for reusing it to other Options Signed-off-by: lxl66566 chore: move range_end to RangeEndOptions, add tests Signed-off-by: lxl66566 fix: some rename and doc fix Signed-off-by: lxl66566 --- crates/simulation/src/xline_group.rs | 24 +++++-- crates/simulation/tests/it/xline.rs | 4 +- crates/xline-client/examples/watch.rs | 4 +- crates/xline-client/src/clients/lock.rs | 8 +-- crates/xline-client/src/clients/watch.rs | 19 +++--- crates/xline-client/src/types/mod.rs | 2 + crates/xline-client/src/types/range_end.rs | 63 ++++++++++++++++++ crates/xline-client/src/types/watch.rs | 68 +++++++++++--------- crates/xline-client/tests/it/watch.rs | 9 +-- crates/xline/src/server/lock_server.rs | 4 +- crates/xline/tests/it/watch_test.rs | 7 +- crates/xlineapi/src/command.rs | 3 +- crates/xlinectl/src/command/watch.rs | 74 ++++++++++++---------- 13 files changed, 191 insertions(+), 98 deletions(-) create mode 100644 crates/xline-client/src/types/range_end.rs diff --git a/crates/simulation/src/xline_group.rs b/crates/simulation/src/xline_group.rs index eb97322d2..bfa908e24 100644 --- a/crates/simulation/src/xline_group.rs +++ b/crates/simulation/src/xline_group.rs @@ -17,7 +17,7 @@ use xline_client::{ CompactionRequest, CompactionResponse, PutOptions, PutResponse, RangeRequest, RangeResponse, }, - watch::{WatchRequest, WatchStreaming, Watcher}, + watch::{WatchOptions, WatchStreaming, Watcher}, }, Client, ClientOptions, }; @@ -191,7 +191,18 @@ impl SimClient { } impl_client_method!(range, kv_client, RangeRequest, RangeResponse); impl_client_method!(compact, kv_client, CompactionRequest, CompactionResponse); - impl_client_method!(watch, watch_client, WatchRequest, (Watcher, WatchStreaming)); + pub async fn watch( + &self, + key: impl Into>, + options: Option, + ) -> Result<(Watcher, WatchStreaming), XlineClientError> { + let client = self.inner.clone(); + let key = key.into(); + self.handle + .spawn(async move { client.watch_client().watch(key, options).await }) + .await + .unwrap() + } } impl Drop for XlineGroup { @@ -279,17 +290,20 @@ impl SimEtcdClient { pub async fn watch( &self, - request: WatchRequest, + key: impl Into>, + options: Option, ) -> Result<(Watcher, WatchStreaming), XlineClientError> { let mut client = self.watch.clone(); - + let key = key.into(); self.handle .spawn(async move { let (mut request_sender, request_receiver) = futures::channel::mpsc::channel::(128); let request = xlineapi::WatchRequest { - request_union: Some(RequestUnion::CreateRequest(request.into())), + request_union: Some(RequestUnion::CreateRequest( + options.unwrap_or_default().with_key(key).into(), + )), }; request_sender diff --git a/crates/simulation/tests/it/xline.rs b/crates/simulation/tests/it/xline.rs index 011efa32e..4d0423603 100644 --- a/crates/simulation/tests/it/xline.rs +++ b/crates/simulation/tests/it/xline.rs @@ -6,7 +6,7 @@ use simulation::xline_group::{SimEtcdClient, XlineGroup}; use xline_client::types::{ cluster::{MemberAddRequest, MemberListRequest}, kv::CompactionRequest, - watch::WatchRequest, + watch::WatchOptions, }; // TODO: Add more tests if needed @@ -39,7 +39,7 @@ async fn watch_compacted_revision_should_receive_canceled_response() { assert!(result.is_ok()); let (_, mut watch_stream) = client - .watch(WatchRequest::new("key").with_start_revision(4)) + .watch("key", Some(WatchOptions::default().with_start_revision(4))) .await .unwrap(); let r = watch_stream.message().await.unwrap().unwrap(); diff --git a/crates/xline-client/examples/watch.rs b/crates/xline-client/examples/watch.rs index e43d4d72e..00792f192 100644 --- a/crates/xline-client/examples/watch.rs +++ b/crates/xline-client/examples/watch.rs @@ -1,5 +1,5 @@ use anyhow::Result; -use xline_client::{types::watch::WatchRequest, Client, ClientOptions}; +use xline_client::{Client, ClientOptions}; #[tokio::main] async fn main() -> Result<()> { @@ -11,7 +11,7 @@ async fn main() -> Result<()> { let kv_client = client.kv_client(); // watch - let (mut watcher, mut stream) = watch_client.watch(WatchRequest::new("key1")).await?; + let (mut watcher, mut stream) = watch_client.watch("key1", None).await?; kv_client.put("key1", "value1", None).await?; let resp = stream.message().await?.unwrap(); diff --git a/crates/xline-client/src/clients/lock.rs b/crates/xline-client/src/clients/lock.rs index 322c4ab09..58af9764a 100644 --- a/crates/xline-client/src/clients/lock.rs +++ b/crates/xline-client/src/clients/lock.rs @@ -16,7 +16,7 @@ use crate::{ clients::{lease::LeaseClient, watch::WatchClient, DEFAULT_SESSION_TTL}, error::{Result, XlineClientError}, lease_gen::LeaseIdGenerator, - types::{kv::TxnRequest as KvTxnRequest, watch::WatchRequest}, + types::kv::TxnRequest as KvTxnRequest, CurpClient, }; @@ -192,7 +192,7 @@ impl Xutex { ..Default::default() })), }; - let range_end = KeyRange::get_prefix(prefix.as_bytes()); + let range_end = KeyRange::get_prefix(prefix); #[allow(clippy::as_conversions)] // this cast is always safe let get_owner = RequestOp { request: Some(Request::RequestRange(RangeRequest { @@ -406,7 +406,7 @@ impl LockClient { let rev = my_rev.overflow_sub(1); let mut watch_client = self.watch_client.clone(); loop { - let range_end = KeyRange::get_prefix(pfx.as_bytes()); + let range_end = KeyRange::get_prefix(&pfx); #[allow(clippy::as_conversions)] // this cast is always safe let get_req = RangeRequest { key: pfx.as_bytes().to_vec(), @@ -424,7 +424,7 @@ impl LockClient { Some(kv) => kv.key.clone(), None => return Ok(()), }; - let (_, mut response_stream) = watch_client.watch(WatchRequest::new(last_key)).await?; + let (_, mut response_stream) = watch_client.watch(last_key, None).await?; while let Some(watch_res) = response_stream.message().await? { #[allow(clippy::as_conversions)] // this cast is always safe if watch_res diff --git a/crates/xline-client/src/clients/watch.rs b/crates/xline-client/src/clients/watch.rs index f1d036802..947cb21fc 100644 --- a/crates/xline-client/src/clients/watch.rs +++ b/crates/xline-client/src/clients/watch.rs @@ -6,7 +6,7 @@ use xlineapi::{self, RequestUnion}; use crate::{ error::{Result, XlineClientError}, - types::watch::{WatchRequest, WatchStreaming, Watcher}, + types::watch::{WatchOptions, WatchStreaming, Watcher}, AuthService, }; @@ -53,10 +53,7 @@ impl WatchClient { /// # Examples /// /// ```no_run - /// use xline_client::{ - /// types::watch::WatchRequest, - /// Client, ClientOptions, - /// }; + /// use xline_client::{Client, ClientOptions}; /// use anyhow::Result; /// /// #[tokio::main] @@ -67,7 +64,7 @@ impl WatchClient { /// let mut watch_client = client.watch_client(); /// let mut kv_client = client.kv_client(); /// - /// let (mut watcher, mut stream) = watch_client.watch(WatchRequest::new("key1")).await?; + /// let (mut watcher, mut stream) = watch_client.watch("key1", None).await?; /// kv_client.put("key1", "value1", None).await?; /// /// let resp = stream.message().await?.unwrap(); @@ -86,12 +83,18 @@ impl WatchClient { /// } /// ``` #[inline] - pub async fn watch(&mut self, request: WatchRequest) -> Result<(Watcher, WatchStreaming)> { + pub async fn watch( + &mut self, + key: impl Into>, + options: Option, + ) -> Result<(Watcher, WatchStreaming)> { let (mut request_sender, request_receiver) = channel::(CHANNEL_SIZE); let request = xlineapi::WatchRequest { - request_union: Some(RequestUnion::CreateRequest(request.into())), + request_union: Some(RequestUnion::CreateRequest( + options.unwrap_or_default().with_key(key.into()).into(), + )), }; request_sender diff --git a/crates/xline-client/src/types/mod.rs b/crates/xline-client/src/types/mod.rs index a3abb3b5f..c1bec0e75 100644 --- a/crates/xline-client/src/types/mod.rs +++ b/crates/xline-client/src/types/mod.rs @@ -8,5 +8,7 @@ pub mod kv; pub mod lease; /// Maintenance type definitions. pub mod maintenance; +/// Range Option definitions, to build a `range_end` from key. +pub mod range_end; /// Watch type definitions. pub mod watch; diff --git a/crates/xline-client/src/types/range_end.rs b/crates/xline-client/src/types/range_end.rs new file mode 100644 index 000000000..d4c3d5f70 --- /dev/null +++ b/crates/xline-client/src/types/range_end.rs @@ -0,0 +1,63 @@ +use xlineapi::command::KeyRange; + +/// Range end options, indicates how to set `range_end` from a key. +#[derive(Clone, Debug, PartialEq, Eq, Default)] +#[non_exhaustive] +pub enum RangeOption { + /// Only lookup the given single key. Use empty Vec as `range_end` + #[default] + SingleKey, + /// If set, Xline will lookup all keys match the given prefix + Prefix, + /// If set, Xline will lookup all keys that are equal to or greater than the given key + FromKey, + /// Set `range_end` directly + RangeEnd(Vec), +} + +impl RangeOption { + /// Get the `range_end` for request, and modify key if necessary. + #[inline] + pub fn get_range_end(self, key: &mut Vec) -> Vec { + match self { + RangeOption::SingleKey => vec![], + RangeOption::Prefix => { + if key.is_empty() { + key.push(0); + vec![0] + } else { + KeyRange::get_prefix(key) + } + } + RangeOption::FromKey => { + if key.is_empty() { + key.push(0); + } + vec![0] + } + RangeOption::RangeEnd(range_end) => range_end, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_range_end() { + let mut key = vec![]; + assert!(RangeOption::SingleKey.get_range_end(&mut key).is_empty()); + assert!(key.is_empty()); + assert!(RangeOption::FromKey.get_range_end(&mut key).first() == Some(&0)); + assert!(key.first() == Some(&0)); + assert_eq!( + RangeOption::Prefix.get_range_end(&mut key), + KeyRange::get_prefix(&key) + ); + assert_eq!( + RangeOption::RangeEnd(vec![1, 2, 3]).get_range_end(&mut key), + vec![1, 2, 3] + ); + } +} diff --git a/crates/xline-client/src/types/watch.rs b/crates/xline-client/src/types/watch.rs index 874253d58..7c7be55aa 100644 --- a/crates/xline-client/src/types/watch.rs +++ b/crates/xline-client/src/types/watch.rs @@ -3,11 +3,11 @@ use std::{ ops::{Deref, DerefMut}, }; +use super::range_end::RangeOption; +use crate::error::{Result, XlineClientError}; use futures::channel::mpsc::Sender; -use xlineapi::{command::KeyRange, RequestUnion, WatchCancelRequest, WatchProgressRequest}; pub use xlineapi::{Event, EventType, KeyValue, WatchResponse}; - -use crate::error::{Result, XlineClientError}; +use xlineapi::{RequestUnion, WatchCancelRequest, WatchProgressRequest}; /// The watching handle. #[derive(Debug)] @@ -39,7 +39,7 @@ impl Watcher { /// /// If sender fails to send to channel #[inline] - pub fn watch(&mut self, request: WatchRequest) -> Result<()> { + pub fn watch(&mut self, request: WatchOptions) -> Result<()> { let request = xlineapi::WatchRequest { request_union: Some(RequestUnion::CreateRequest(request.into())), }; @@ -102,37 +102,28 @@ impl Watcher { } /// Watch Request -#[derive(Clone, Debug, PartialEq)] -pub struct WatchRequest { +#[derive(Clone, Debug, PartialEq, Default)] +pub struct WatchOptions { /// Inner watch create request inner: xlineapi::WatchCreateRequest, + /// Watch range end options + range_end_options: RangeOption, } -impl WatchRequest { - /// Creates a New `WatchRequest` - /// +impl WatchOptions { /// `key` is the key to register for watching. #[inline] #[must_use] - pub fn new(key: impl Into>) -> Self { - Self { - inner: xlineapi::WatchCreateRequest { - key: key.into(), - ..Default::default() - }, - } + pub fn with_key(mut self, key: impl Into>) -> Self { + self.inner.key = key.into(); + self } /// If set, Xline will watch all keys with the matching prefix #[inline] #[must_use] pub fn with_prefix(mut self) -> Self { - if self.inner.key.is_empty() { - self.inner.key = vec![0]; - self.inner.range_end = vec![0]; - } else { - self.inner.range_end = KeyRange::get_prefix(&self.inner.key); - } + self.range_end_options = RangeOption::Prefix; self } @@ -140,10 +131,7 @@ impl WatchRequest { #[inline] #[must_use] pub fn with_from_key(mut self) -> Self { - if self.inner.key.is_empty() { - self.inner.key = vec![0]; - } - self.inner.range_end = vec![0]; + self.range_end_options = RangeOption::FromKey; self } @@ -155,7 +143,7 @@ impl WatchRequest { #[inline] #[must_use] pub fn with_range_end(mut self, range_end: impl Into>) -> Self { - self.inner.range_end = range_end.into(); + self.range_end_options = RangeOption::RangeEnd(range_end.into()); self } @@ -212,9 +200,12 @@ impl WatchRequest { } } -impl From for xlineapi::WatchCreateRequest { +impl From for xlineapi::WatchCreateRequest { #[inline] - fn from(request: WatchRequest) -> Self { + fn from(mut request: WatchOptions) -> Self { + request.inner.range_end = request + .range_end_options + .get_range_end(&mut request.inner.key); request.inner } } @@ -278,3 +269,22 @@ impl DerefMut for WatchStreaming { &mut self.inner } } + +#[cfg(test)] +mod tests { + use xlineapi::command::KeyRange; + + use super::*; + + #[test] + fn test_watch_request_build_from_watch_options() { + let options = WatchOptions::default().with_prev_kv().with_key("key"); + let request = xlineapi::WatchCreateRequest::from(options.clone()); + assert!(request.prev_kv); + assert!(request.range_end.is_empty()); + + let options2 = options.clone().with_prefix(); + let request = xlineapi::WatchCreateRequest::from(options2.clone()); + assert_eq!(request.range_end, KeyRange::get_prefix("key")); + } +} diff --git a/crates/xline-client/tests/it/watch.rs b/crates/xline-client/tests/it/watch.rs index a8a803677..f6c573088 100644 --- a/crates/xline-client/tests/it/watch.rs +++ b/crates/xline-client/tests/it/watch.rs @@ -1,8 +1,5 @@ //! The following tests are originally from `etcd-client` -use xline_client::{ - error::Result, - types::watch::{EventType, WatchRequest}, -}; +use xline_client::{error::Result, types::watch::EventType}; use super::common::get_cluster_client; @@ -12,7 +9,7 @@ async fn watch_should_receive_consistent_events() -> Result<()> { let mut watch_client = client.watch_client(); let kv_client = client.kv_client(); - let (mut watcher, mut stream) = watch_client.watch(WatchRequest::new("watch01")).await?; + let (mut watcher, mut stream) = watch_client.watch("watch01", None).await?; kv_client.put("watch01", "01", None).await?; @@ -41,7 +38,7 @@ async fn watch_stream_should_work_after_watcher_dropped() -> Result<()> { let mut watch_client = client.watch_client(); let kv_client = client.kv_client(); - let (_, mut stream) = watch_client.watch(WatchRequest::new("watch01")).await?; + let (_, mut stream) = watch_client.watch("watch01", None).await?; kv_client.put("watch01", "01", None).await?; diff --git a/crates/xline/src/server/lock_server.rs b/crates/xline/src/server/lock_server.rs index f5649cb8c..578b03e1e 100644 --- a/crates/xline/src/server/lock_server.rs +++ b/crates/xline/src/server/lock_server.rs @@ -107,7 +107,7 @@ impl LockServer { ..Default::default() })), }; - let range_end = KeyRange::get_prefix(prefix.as_bytes()); + let range_end = KeyRange::get_prefix(prefix); #[allow(clippy::as_conversions)] // this cast is always safe let get_owner = RequestOp { request: Some(Request::RequestRange(RangeRequest { @@ -137,7 +137,7 @@ impl LockServer { let mut watch_client = WatchClient::new(Channel::balance_list(self.addrs.clone().into_iter())); loop { - let range_end = KeyRange::get_prefix(pfx.as_bytes()); + let range_end = KeyRange::get_prefix(&pfx); #[allow(clippy::as_conversions)] // this cast is always safe let get_req = RangeRequest { key: pfx.as_bytes().to_vec(), diff --git a/crates/xline/tests/it/watch_test.rs b/crates/xline/tests/it/watch_test.rs index d2de44bf9..4d2e8b80c 100644 --- a/crates/xline/tests/it/watch_test.rs +++ b/crates/xline/tests/it/watch_test.rs @@ -1,10 +1,7 @@ use std::error::Error; use test_macros::abort_on_panic; -use xline_test_utils::{ - types::{kv::DeleteRangeRequest, watch::WatchRequest}, - Cluster, -}; +use xline_test_utils::{types::kv::DeleteRangeRequest, Cluster}; use xlineapi::EventType; fn event_type(event_type: i32) -> EventType { @@ -24,7 +21,7 @@ async fn test_watch() -> Result<(), Box> { let mut watch_client = client.watch_client(); let kv_client = client.kv_client(); - let (_watcher, mut stream) = watch_client.watch(WatchRequest::new("foo")).await?; + let (_watcher, mut stream) = watch_client.watch("foo", None).await?; let handle = tokio::spawn(async move { if let Ok(Some(res)) = stream.message().await { let event = res.events.get(0).unwrap(); diff --git a/crates/xlineapi/src/command.rs b/crates/xlineapi/src/command.rs index 28aa44f63..ecbd37231 100644 --- a/crates/xlineapi/src/command.rs +++ b/crates/xlineapi/src/command.rs @@ -131,7 +131,8 @@ impl KeyRange { #[allow(clippy::indexing_slicing)] // end[i] is always valid #[must_use] #[inline] - pub fn get_prefix(key: &[u8]) -> Vec { + pub fn get_prefix(key: impl AsRef<[u8]>) -> Vec { + let key = key.as_ref(); let mut end = key.to_vec(); for i in (0..key.len()).rev() { if key[i] < 0xFF { diff --git a/crates/xlinectl/src/command/watch.rs b/crates/xlinectl/src/command/watch.rs index bdc1fc6f3..16e1a2f76 100644 --- a/crates/xlinectl/src/command/watch.rs +++ b/crates/xlinectl/src/command/watch.rs @@ -6,7 +6,7 @@ use clap::{arg, value_parser, ArgMatches, Command}; use std::process::Command as StdCommand; use xline_client::{ error::XlineClientError, - types::watch::{WatchRequest, Watcher}, + types::watch::{WatchOptions, Watcher}, Client, }; use xlineapi::command::Command as XlineCommand; @@ -38,7 +38,7 @@ pub(crate) fn command() -> Command { } /// a function that builds a watch request with existing fields -type BuildRequestFn = dyn Fn(&str, Option<&str>) -> WatchRequest; +type BuildRequestFn = dyn Fn(Option<&str>) -> WatchOptions; /// Build request from matches pub(crate) fn build_request(matches: &ArgMatches) -> Box { @@ -47,8 +47,8 @@ pub(crate) fn build_request(matches: &ArgMatches) -> Box { let pre_kv = matches.get_flag("pre_kv"); let progress_notify = matches.get_flag("progress_notify"); - Box::new(move |key: &str, range_end: Option<&str>| -> WatchRequest { - let mut request = WatchRequest::new(key.as_bytes()); + Box::new(move |range_end: Option<&str>| -> WatchOptions { + let mut request = WatchOptions::default(); if prefix { request = request.with_prefix(); @@ -87,7 +87,7 @@ pub(crate) async fn execute(client: &mut Client, matches: &ArgMatches) -> Result async fn exec_non_interactive(client: &mut Client, matches: &ArgMatches) -> Result<()> { let key = matches.get_one::("key").expect("required"); let range_end = matches.get_one::("range_end"); - let request = build_request(matches)(key, range_end.map(String::as_str)); + let watch_options = build_request(matches)(range_end.map(String::as_str)); // extract the command provided by user let command_to_execute: Vec = matches @@ -96,7 +96,10 @@ async fn exec_non_interactive(client: &mut Client, matches: &ArgMatches) -> Resu .map(OsString::from) .collect(); - let (_watcher, mut stream) = client.watch_client().watch(request).await?; + let (_watcher, mut stream) = client + .watch_client() + .watch(key.as_bytes(), Some(watch_options)) + .await?; while let Some(resp) = stream .message() .await @@ -217,8 +220,11 @@ async fn exec_interactive(client: &mut Client, matches: &ArgMatches) -> Result<( let Some(key) = args.next() else { failed!(line); }; - let request = req_builder(key, args.next()); - let (new_watcher, mut stream) = client.watch_client().watch(request).await?; + let watch_options = req_builder(args.next()); + let (new_watcher, mut stream) = client + .watch_client() + .watch(key.as_bytes(), Some(watch_options)) + .await?; watcher = Some(new_watcher); let _handle = tokio::spawn(async move { while let Some(resp) = stream.message().await? { @@ -259,12 +265,21 @@ mod tests { struct TestCase { arg: Vec<&'static str>, - req: Option, + key: String, + req: Option, } impl TestCase { - fn new(arg: Vec<&'static str>, req: Option) -> TestCase { - TestCase { arg, req } + fn new( + arg: Vec<&'static str>, + key: impl Into, + req: Option, + ) -> TestCase { + TestCase { + arg, + key: key.into(), + req, + } } fn run_test(&self) { @@ -282,7 +297,8 @@ mod tests { }; let key = matches.get_one::("key").expect("required"); let range_end = matches.get_one::("range_end"); - let req = build_request(&matches)(key, range_end.map(String::as_str)); + let req = build_request(&matches)(range_end.map(String::as_str)); + assert_eq!(key.to_owned(), self.key); assert_eq!(Some(req), self.req); // Extract the command to execute from the matches let command_to_execute: Vec = matches @@ -314,12 +330,14 @@ mod tests { let test_cases = vec![ TestCase::new( vec!["watch", "key1", "key11"], - Some(WatchRequest::new("key1").with_range_end("key11")), + "key1", + Some(WatchOptions::default().with_range_end("key11")), ), TestCase::new( vec!["watch", "key1", "key11", "--rev", "100", "--pre_kv"], + "key1", Some( - WatchRequest::new("key1") + WatchOptions::default() .with_range_end("key11") .with_start_revision(100) .with_prev_kv(), @@ -327,11 +345,8 @@ mod tests { ), TestCase::new( vec!["watch", "key1", "--prefix", "--progress_notify"], - Some( - WatchRequest::new("key1") - .with_prefix() - .with_progress_notify(), - ), + "key1", + Some(WatchOptions::default().with_prefix().with_progress_notify()), ), // newly added test case: // testing command `-- echo watch event received` @@ -345,11 +360,8 @@ mod tests { "echo", "watch event received", ], - Some( - WatchRequest::new("key1") - .with_prefix() - .with_progress_notify(), - ), + "key1", + Some(WatchOptions::default().with_prefix().with_progress_notify()), ), // newly added test case: // testing command `-- sh -c ls` @@ -364,11 +376,8 @@ mod tests { "-c", "ls", ], - Some( - WatchRequest::new("key1") - .with_prefix() - .with_progress_notify(), - ), + "key1", + Some(WatchOptions::default().with_prefix().with_progress_notify()), ), // newly added test case: // testing command `-- sh -c "env | grep XLINE_WATCH_"` @@ -383,11 +392,8 @@ mod tests { "-c", "env | grep XLINE_WATCH_", ], - Some( - WatchRequest::new("key1") - .with_prefix() - .with_progress_notify(), - ), + "key1", + Some(WatchOptions::default().with_prefix().with_progress_notify()), ), ]; From 9833df7ff7a8d0b4fd909fca58023daabf65fd85 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 1 Aug 2024 05:34:33 +0000 Subject: [PATCH 014/322] chore(deps): bump crate-ci/typos from 1.23.5 to 1.23.6 Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.23.5 to 1.23.6. - [Release notes](https://github.com/crate-ci/typos/releases) - [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md) - [Commits](https://github.com/crate-ci/typos/compare/v1.23.5...v1.23.6) --- updated-dependencies: - dependency-name: crate-ci/typos dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 17ab73206..a816e8d19 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -112,7 +112,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Check Spelling - uses: crate-ci/typos@v1.23.5 + uses: crate-ci/typos@v1.23.6 build: name: Build From 9898b1f9476eaca280ab82b48ecb4a0be608ac58 Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Tue, 16 Jul 2024 10:06:02 +0800 Subject: [PATCH 015/322] refactor(client)!: AuthClient::role_get Signed-off-by: lxl66566 --- crates/xline-client/src/clients/auth.rs | 3 +-- crates/xline-client/tests/it/auth.rs | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/crates/xline-client/src/clients/auth.rs b/crates/xline-client/src/clients/auth.rs index 7134cb763..296c8de85 100644 --- a/crates/xline-client/src/clients/auth.rs +++ b/crates/xline-client/src/clients/auth.rs @@ -14,8 +14,7 @@ use xlineapi::{ use crate::{ error::{Result, XlineClientError}, types::auth::{ - AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGrantPermissionRequest, - AuthRoleRevokePermissionRequest, + AuthRoleDeleteRequest, AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, }, AuthService, CurpClient, }; diff --git a/crates/xline-client/tests/it/auth.rs b/crates/xline-client/tests/it/auth.rs index 5d8d86d29..5565ec11f 100644 --- a/crates/xline-client/tests/it/auth.rs +++ b/crates/xline-client/tests/it/auth.rs @@ -2,8 +2,8 @@ use xline_client::{ error::Result, types::auth::{ - AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGrantPermissionRequest, - AuthRoleRevokePermissionRequest, Permission, PermissionType, + AuthRoleDeleteRequest, AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, + Permission, PermissionType, }, }; From f6525e04710964abd1af7047b8e7aea18a7f1ce6 Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Tue, 16 Jul 2024 09:55:16 +0800 Subject: [PATCH 016/322] refactor(client)!: AuthClient::role_add Signed-off-by: lxl66566 --- crates/xline-client/examples/auth_role.rs | 8 +++---- crates/xline-client/src/clients/auth.rs | 11 +++++----- crates/xline-client/src/types/auth.rs | 26 ----------------------- crates/xline-client/tests/it/auth.rs | 10 ++++----- crates/xline-test-utils/src/lib.rs | 6 ++---- crates/xlinectl/src/command/role/add.rs | 22 +++++++++---------- 6 files changed, 28 insertions(+), 55 deletions(-) diff --git a/crates/xline-client/examples/auth_role.rs b/crates/xline-client/examples/auth_role.rs index 39199f1f9..91b2d209b 100644 --- a/crates/xline-client/examples/auth_role.rs +++ b/crates/xline-client/examples/auth_role.rs @@ -1,8 +1,8 @@ use anyhow::Result; use xline_client::{ types::auth::{ - AuthRoleAddRequest, AuthRoleDeleteRequest, AuthRoleGrantPermissionRequest, - AuthRoleRevokePermissionRequest, Permission, PermissionType, + AuthRoleDeleteRequest, AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, + Permission, PermissionType, }, Client, ClientOptions, }; @@ -17,8 +17,8 @@ async fn main() -> Result<()> { .auth_client(); // add roles - client.role_add(AuthRoleAddRequest::new("role1")).await?; - client.role_add(AuthRoleAddRequest::new("role2")).await?; + client.role_add("role1").await?; + client.role_add("role2").await?; // grant permissions to roles client diff --git a/crates/xline-client/src/clients/auth.rs b/crates/xline-client/src/clients/auth.rs index 296c8de85..2174fe227 100644 --- a/crates/xline-client/src/clients/auth.rs +++ b/crates/xline-client/src/clients/auth.rs @@ -517,7 +517,6 @@ impl AuthClient { /// # Examples /// /// ```no_run - /// use xline_client::types::auth::AuthRoleAddRequest; /// use xline_client::{Client, ClientOptions}; /// use anyhow::Result; /// @@ -529,19 +528,21 @@ impl AuthClient { /// .await? /// .auth_client(); /// - /// client.role_add(AuthRoleAddRequest::new("role")).await?; + /// client.role_add("role").await?; /// /// Ok(()) /// } ///``` #[inline] - pub async fn role_add(&self, request: AuthRoleAddRequest) -> Result { - if request.inner.name.is_empty() { + pub async fn role_add(&self, name: impl Into) -> Result { + let name = name.into(); + if name.is_empty() { return Err(XlineClientError::InvalidArgs(String::from( "role name is empty", ))); } - self.handle_req(request.inner, false).await + self.handle_req(xlineapi::AuthRoleAddRequest { name }, false) + .await } /// Gets role. diff --git a/crates/xline-client/src/types/auth.rs b/crates/xline-client/src/types/auth.rs index 0cd894d41..ad3a93717 100644 --- a/crates/xline-client/src/types/auth.rs +++ b/crates/xline-client/src/types/auth.rs @@ -8,32 +8,6 @@ pub use xlineapi::{ AuthenticateResponse, Type as PermissionType, }; -/// Request for `AuthRoleAdd` -#[derive(Debug, PartialEq)] -pub struct AuthRoleAddRequest { - /// Inner request - pub(crate) inner: xlineapi::AuthRoleAddRequest, -} - -impl AuthRoleAddRequest { - /// Creates a new `AuthRoleAddRequest` - /// - /// `role` is the name of the role to add. - #[inline] - pub fn new(role: impl Into) -> Self { - Self { - inner: xlineapi::AuthRoleAddRequest { name: role.into() }, - } - } -} - -impl From for xlineapi::AuthRoleAddRequest { - #[inline] - fn from(req: AuthRoleAddRequest) -> Self { - req.inner - } -} - /// Request for `AuthRoleDelete` #[derive(Debug, PartialEq)] pub struct AuthRoleDeleteRequest { diff --git a/crates/xline-client/tests/it/auth.rs b/crates/xline-client/tests/it/auth.rs index 5565ec11f..caa8749f5 100644 --- a/crates/xline-client/tests/it/auth.rs +++ b/crates/xline-client/tests/it/auth.rs @@ -16,8 +16,8 @@ async fn role_operations_should_success_in_normal_path() -> Result<()> { let role1 = "role1"; let role2 = "role2"; - client.role_add(AuthRoleAddRequest::new(role1)).await?; - client.role_add(AuthRoleAddRequest::new(role2)).await?; + client.role_add(role1).await?; + client.role_add(role2).await?; client.role_get(role1).await?; client.role_get(role2).await?; @@ -53,7 +53,7 @@ async fn permission_operations_should_success_in_normal_path() -> Result<()> { let perm4 = Permission::new(PermissionType::Write, "pp").with_prefix(); let perm5 = Permission::new(PermissionType::Read, vec![0]).with_from_key(); - client.role_add(AuthRoleAddRequest::new(role1)).await?; + client.role_add(role1).await?; client .role_grant_permission(AuthRoleGrantPermissionRequest::new(role1, perm1.clone())) @@ -144,8 +144,8 @@ async fn user_role_operations_should_success_in_normal_path() -> Result<()> { let role2 = "role2"; client.user_add(name1, "", true).await?; - client.role_add(AuthRoleAddRequest::new(role1)).await?; - client.role_add(AuthRoleAddRequest::new(role2)).await?; + client.role_add(role1).await?; + client.role_add(role2).await?; client.user_grant_role(name1, role1).await?; client.user_grant_role(name1, role2).await?; diff --git a/crates/xline-test-utils/src/lib.rs b/crates/xline-test-utils/src/lib.rs index cfa549351..6a293036f 100644 --- a/crates/xline-test-utils/src/lib.rs +++ b/crates/xline-test-utils/src/lib.rs @@ -14,9 +14,7 @@ use utils::config::{ LogConfig, MetricsConfig, StorageConfig, TlsConfig, TraceConfig, XlineServerConfig, }; use xline::server::XlineServer; -use xline_client::types::auth::{ - AuthRoleAddRequest, AuthRoleGrantPermissionRequest, Permission, PermissionType, -}; +use xline_client::types::auth::{AuthRoleGrantPermissionRequest, Permission, PermissionType}; pub use xline_client::{clients, types, Client, ClientOptions}; /// Cluster @@ -348,7 +346,7 @@ pub async fn set_user( ) -> Result<(), Box> { let client = client.auth_client(); client.user_add(name, password, false).await?; - client.role_add(AuthRoleAddRequest::new(role)).await?; + client.role_add(role).await?; client.user_grant_role(name, role).await?; if !key.is_empty() { client diff --git a/crates/xlinectl/src/command/role/add.rs b/crates/xlinectl/src/command/role/add.rs index 19dc4a791..50201b54e 100644 --- a/crates/xlinectl/src/command/role/add.rs +++ b/crates/xlinectl/src/command/role/add.rs @@ -1,5 +1,5 @@ use clap::{arg, ArgMatches, Command}; -use xline_client::{error::Result, types::auth::AuthRoleAddRequest, Client}; +use xline_client::{error::Result, Client}; use crate::utils::printer::Printer; @@ -11,17 +11,20 @@ pub(super) fn command() -> Command { } /// Build request from matches -pub(super) fn build_request(matches: &ArgMatches) -> AuthRoleAddRequest { +/// +/// # Returns +/// +/// name of the role +pub(super) fn build_request(matches: &ArgMatches) -> String { let name = matches.get_one::("name").expect("required"); - AuthRoleAddRequest::new(name) + name.into() } /// Execute the command pub(super) async fn execute(client: &mut Client, matches: &ArgMatches) -> Result<()> { - let req = build_request(matches); - let resp = client.auth_client().role_add(req).await?; + let name = build_request(matches); + let resp = client.auth_client().role_add(name).await?; resp.print(); - Ok(()) } @@ -30,14 +33,11 @@ mod tests { use super::*; use crate::test_case_struct; - test_case_struct!(AuthRoleAddRequest); + test_case_struct!(String); #[test] fn command_parse_should_be_valid() { - let test_cases = vec![TestCase::new( - vec!["add", "Admin"], - Some(AuthRoleAddRequest::new("Admin")), - )]; + let test_cases = vec![TestCase::new(vec!["add", "Admin"], Some("Admin".into()))]; for case in test_cases { case.run_test(); From 8e03c92118febfdfff21bcca3516d2bfe84ae996 Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Thu, 18 Jul 2024 08:29:12 +0800 Subject: [PATCH 017/322] refactor(client)!: AuthClient::role_delete Signed-off-by: lxl66566 --- crates/xline-client/examples/auth_role.rs | 11 +++------ crates/xline-client/src/clients/auth.rs | 16 +++++-------- crates/xline-client/src/types/auth.rs | 26 ---------------------- crates/xline-client/tests/it/auth.rs | 15 ++++--------- crates/xline/tests/it/auth_test.rs | 8 ++----- crates/xlinectl/src/command/role/delete.rs | 15 ++++++------- 6 files changed, 22 insertions(+), 69 deletions(-) diff --git a/crates/xline-client/examples/auth_role.rs b/crates/xline-client/examples/auth_role.rs index 91b2d209b..70e146f72 100644 --- a/crates/xline-client/examples/auth_role.rs +++ b/crates/xline-client/examples/auth_role.rs @@ -1,8 +1,7 @@ use anyhow::Result; use xline_client::{ types::auth::{ - AuthRoleDeleteRequest, AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, - Permission, PermissionType, + AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, Permission, PermissionType, }, Client, ClientOptions, }; @@ -55,12 +54,8 @@ async fn main() -> Result<()> { .await?; // delete roles - client - .role_delete(AuthRoleDeleteRequest::new("role1")) - .await?; - client - .role_delete(AuthRoleDeleteRequest::new("role2")) - .await?; + client.role_delete("role1").await?; + client.role_delete("role2").await?; Ok(()) } diff --git a/crates/xline-client/src/clients/auth.rs b/crates/xline-client/src/clients/auth.rs index 2174fe227..b4f845e8a 100644 --- a/crates/xline-client/src/clients/auth.rs +++ b/crates/xline-client/src/clients/auth.rs @@ -13,9 +13,7 @@ use xlineapi::{ use crate::{ error::{Result, XlineClientError}, - types::auth::{ - AuthRoleDeleteRequest, AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, - }, + types::auth::{AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest}, AuthService, CurpClient, }; @@ -626,7 +624,7 @@ impl AuthClient { /// # Examples /// /// ```no_run - /// use xline_client::{types::auth::AuthRoleDeleteRequest, Client, ClientOptions}; + /// use xline_client::{Client, ClientOptions}; /// use anyhow::Result; /// /// #[tokio::main] @@ -640,18 +638,16 @@ impl AuthClient { /// // add the role /// /// client - /// .role_delete(AuthRoleDeleteRequest::new("role")) + /// .role_delete("role") /// .await?; /// /// Ok(()) /// } ///``` #[inline] - pub async fn role_delete( - &self, - request: AuthRoleDeleteRequest, - ) -> Result { - self.handle_req(request.inner, false).await + pub async fn role_delete(&self, name: impl Into) -> Result { + self.handle_req(xlineapi::AuthRoleDeleteRequest { role: name.into() }, false) + .await } /// Grants role permission. diff --git a/crates/xline-client/src/types/auth.rs b/crates/xline-client/src/types/auth.rs index ad3a93717..ca10dc170 100644 --- a/crates/xline-client/src/types/auth.rs +++ b/crates/xline-client/src/types/auth.rs @@ -8,32 +8,6 @@ pub use xlineapi::{ AuthenticateResponse, Type as PermissionType, }; -/// Request for `AuthRoleDelete` -#[derive(Debug, PartialEq)] -pub struct AuthRoleDeleteRequest { - /// Inner request - pub(crate) inner: xlineapi::AuthRoleDeleteRequest, -} - -impl AuthRoleDeleteRequest { - /// Creates a new `AuthRoleDeleteRequest` - /// - /// `role` is the name of the role to delete. - #[inline] - pub fn new(role: impl Into) -> Self { - Self { - inner: xlineapi::AuthRoleDeleteRequest { role: role.into() }, - } - } -} - -impl From for xlineapi::AuthRoleDeleteRequest { - #[inline] - fn from(req: AuthRoleDeleteRequest) -> Self { - req.inner - } -} - /// Request for `AuthRoleGrantPermission` #[derive(Debug, PartialEq)] pub struct AuthRoleGrantPermissionRequest { diff --git a/crates/xline-client/tests/it/auth.rs b/crates/xline-client/tests/it/auth.rs index caa8749f5..dad88d0b5 100644 --- a/crates/xline-client/tests/it/auth.rs +++ b/crates/xline-client/tests/it/auth.rs @@ -2,8 +2,7 @@ use xline_client::{ error::Result, types::auth::{ - AuthRoleDeleteRequest, AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, - Permission, PermissionType, + AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, Permission, PermissionType, }, }; @@ -28,12 +27,8 @@ async fn role_operations_should_success_in_normal_path() -> Result<()> { vec![role1.to_owned(), role2.to_owned()] ); - client - .role_delete(AuthRoleDeleteRequest::new(role1)) - .await?; - client - .role_delete(AuthRoleDeleteRequest::new(role2)) - .await?; + client.role_delete(role1).await?; + client.role_delete(role2).await?; client.role_get(role1).await.unwrap_err(); client.role_get(role2).await.unwrap_err(); @@ -105,9 +100,7 @@ async fn permission_operations_should_success_in_normal_path() -> Result<()> { let role_get_resp = client.role_get(role1).await?; assert!(role_get_resp.perm.is_empty()); - client - .role_delete(AuthRoleDeleteRequest::new(role1)) - .await?; + client.role_delete(role1).await?; Ok(()) } diff --git a/crates/xline/tests/it/auth_test.rs b/crates/xline/tests/it/auth_test.rs index c72a598da..67040269b 100644 --- a/crates/xline/tests/it/auth_test.rs +++ b/crates/xline/tests/it/auth_test.rs @@ -6,9 +6,7 @@ use utils::config::{ TraceConfig, XlineServerConfig, }; use xline_test_utils::{ - enable_auth, set_user, - types::{auth::AuthRoleDeleteRequest, kv::RangeRequest}, - Client, ClientOptions, Cluster, + enable_auth, set_user, types::kv::RangeRequest, Client, ClientOptions, Cluster, }; #[tokio::test(flavor = "multi_thread")] @@ -145,9 +143,7 @@ async fn test_role_delete() -> Result<(), Box> { set_user(client, "u", "123", "r", b"foo", &[]).await?; let user = auth_client.user_get("u").await?; assert_eq!(user.roles.len(), 1); - auth_client - .role_delete(AuthRoleDeleteRequest::new("r")) - .await?; + auth_client.role_delete("r").await?; let user = auth_client.user_get("u").await?; assert_eq!(user.roles.len(), 0); diff --git a/crates/xlinectl/src/command/role/delete.rs b/crates/xlinectl/src/command/role/delete.rs index 40b2f533f..de705bc89 100644 --- a/crates/xlinectl/src/command/role/delete.rs +++ b/crates/xlinectl/src/command/role/delete.rs @@ -1,5 +1,5 @@ use clap::{arg, ArgMatches, Command}; -use xline_client::{error::Result, types::auth::AuthRoleDeleteRequest, Client}; +use xline_client::{error::Result, Client}; use crate::utils::printer::Printer; @@ -11,9 +11,11 @@ pub(super) fn command() -> Command { } /// Build request from matches -pub(super) fn build_request(matches: &ArgMatches) -> AuthRoleDeleteRequest { +/// +/// Returns the name of the role to be deleted +pub(super) fn build_request(matches: &ArgMatches) -> String { let name = matches.get_one::("name").expect("required"); - AuthRoleDeleteRequest::new(name) + name.to_owned() } /// Execute the command @@ -30,14 +32,11 @@ mod tests { use super::*; use crate::test_case_struct; - test_case_struct!(AuthRoleDeleteRequest); + test_case_struct!(String); #[test] fn command_parse_should_be_valid() { - let test_cases = vec![TestCase::new( - vec!["delete", "Admin"], - Some(AuthRoleDeleteRequest::new("Admin")), - )]; + let test_cases = vec![TestCase::new(vec!["delete", "Admin"], Some("Admin".into()))]; for case in test_cases { case.run_test(); From 686cbe19e560624ca2334bb21f80ae76d9be7f69 Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Mon, 29 Jul 2024 09:52:07 +0800 Subject: [PATCH 018/322] chore(deps): bump dashmap to v6.0.1 Signed-off-by: lxl66566 --- Cargo.lock | 6 ++++-- crates/curp/Cargo.toml | 2 +- crates/utils/Cargo.toml | 2 +- crates/xline/Cargo.toml | 2 +- workspace-hack/Cargo.toml | 1 + 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c41d8bfef..9e7805bff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -803,11 +803,12 @@ dependencies = [ [[package]] name = "dashmap" -version = "5.5.3" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +checksum = "804c8821570c3f8b70230c2ba75ffa5c0f9a4189b9a432b6656c536712acae28" dependencies = [ "cfg-if", + "crossbeam-utils", "hashbrown 0.14.5", "lock_api", "once_cell", @@ -3776,6 +3777,7 @@ dependencies = [ "bytes", "cc", "clap", + "crossbeam-utils", "crypto-common", "digest", "either", diff --git a/crates/curp/Cargo.toml b/crates/curp/Cargo.toml index bcee6671c..25dc3dc29 100644 --- a/crates/curp/Cargo.toml +++ b/crates/curp/Cargo.toml @@ -19,7 +19,7 @@ bytes = "1.4.0" clippy-utilities = "0.2.0" curp-external-api = { path = "../curp-external-api" } curp-test-utils = { path = "../curp-test-utils" } -dashmap = "5.5.0" +dashmap = "6.0.1" derive_builder = "0.20.0" engine = { path = "../engine" } event-listener = "5.3.1" diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml index 2bcdd87d3..23b3fb606 100644 --- a/crates/utils/Cargo.toml +++ b/crates/utils/Cargo.toml @@ -20,7 +20,7 @@ parking_lot = ["dep:parking_lot"] [dependencies] async-trait = { version = "0.1.80", optional = true } clippy-utilities = "0.2.0" -dashmap = "5.5.3" +dashmap = "6.0.1" derive_builder = "0.20.0" event-listener = "5.3.1" futures = "0.3.30" diff --git a/crates/xline/Cargo.toml b/crates/xline/Cargo.toml index dfeba9304..6383c0a81 100644 --- a/crates/xline/Cargo.toml +++ b/crates/xline/Cargo.toml @@ -23,7 +23,7 @@ crc32fast = "1.4.0" crossbeam-skiplist = "0.1.1" curp = { path = "../curp", version = "0.1.0", features = ["client-metrics"] } curp-external-api = { path = "../curp-external-api" } -dashmap = "5.5.3" +dashmap = "6.0.1" engine = { path = "../engine" } event-listener = "5.3.1" futures = "0.3.25" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 6b4d31d24..c4c4905db 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -16,6 +16,7 @@ publish = false axum = { version = "0.6" } bytes = { version = "1" } clap = { version = "4", features = ["derive"] } +crossbeam-utils = { version = "0.8" } crypto-common = { version = "0.1", default-features = false, features = ["std"] } digest = { version = "0.10", features = ["mac", "std"] } either = { version = "1", default-features = false, features = ["use_std"] } From d35ac5fb6f383d85e9b32a185602f72e6f604c52 Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Mon, 5 Aug 2024 10:03:18 +0800 Subject: [PATCH 019/322] fix(ci): free disk space to avoid abnormal fail Signed-off-by: lxl66566 test mount and delete Signed-off-by: lxl66566 --- .github/workflows/benchmark.yml | 8 ++---- .github/workflows/build_env.yml | 6 +++- .github/workflows/build_xline.yml | 9 ++---- .github/workflows/pull_request.yml | 46 +++++++++++++++++++----------- .github/workflows/validation.yml | 14 ++++----- Cargo.toml | 1 + ci/Dockerfile | 5 ++-- 7 files changed, 47 insertions(+), 42 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 64dc895d9..8d38ad63e 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -17,12 +17,8 @@ jobs: with: submodules: recursive - - name: Configure sccache - uses: actions/github-script@v7 - with: - script: | - core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); - core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); + - name: Setup sccache-cache + uses: mozilla-actions/sccache-action@v0.0.5 call_build_xline: name: Build and Upload Artifacts diff --git a/.github/workflows/build_env.yml b/.github/workflows/build_env.yml index d906d8102..6bd756538 100644 --- a/.github/workflows/build_env.yml +++ b/.github/workflows/build_env.yml @@ -1,7 +1,11 @@ name: Build CI Env Image on: - workflow_dispatch: {} + push: + paths: + - "ci/build-env.sh" + - "ci/Dockerfile" + workflow_dispatch: jobs: build_env: diff --git a/.github/workflows/build_xline.yml b/.github/workflows/build_xline.yml index dcb68ef10..754f75eac 100644 --- a/.github/workflows/build_xline.yml +++ b/.github/workflows/build_xline.yml @@ -39,12 +39,9 @@ jobs: with: submodules: recursive - - name: Configure sccache - uses: actions/github-script@v7 - with: - script: | - core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); - core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); + - name: Setup sccache-cache + uses: mozilla-actions/sccache-action@v0.0.5 + - name: Prepare release binaries id: prepare_binaries run: | diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index a816e8d19..2d68484c1 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -14,8 +14,15 @@ jobs: run: shell: bash env: - SCCACHE_GHA_ENABLED: "on" - container: ghcr.io/xline-kv/build-env:latest + SCCACHE_GHA_ENABLED: "true" + CARGO_INCREMENTAL: 0 # CI will compile all crates from beginning. So disable incremental compile may reduce compile target size. + container: + image: ghcr.io/xline-kv/build-env:latest + volumes: + - /usr/local/lib/android/:/tmp/android/ + - /usr/share/dotnet:/tmp/dotnet + - /opt/ghc:/tmp/ghc + - /usr/lib/firefox:/tmp/firefox strategy: fail-fast: true matrix: @@ -34,16 +41,25 @@ jobs: } name: Tests ${{ matrix.config.name }} steps: + - name: View free disk space + run: df -h / + + - name: Setup sccache-cache + uses: mozilla-actions/sccache-action@v0.0.5 + - uses: actions/checkout@v4 with: submodules: recursive - - name: Configure sccache - uses: actions/github-script@v7 - with: - script: | - core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); - core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); + - name: Free Disk Space + run: | + rm -rf /tmp/android/* || true + rm -rf /tmp/dotnet/* || true + rm -rf /tmp/ghc/* || true + rm -rf /tmp/firefox/* || true + + - name: View free disk space + run: df -h / - name: Trailing spaces check run: ci/scripts/check-trailing-spaces.sh @@ -63,7 +79,7 @@ jobs: - name: Workspace hack check run: cargo hakari generate --diff && cargo hakari manage-deps --dry-run && cargo hakari verify - - run: sccache --zero-stats > /dev/null + - run: ${SCCACHE_PATH} --zero-stats > /dev/null - name: Clippy ${{ matrix.config.name }} env: @@ -71,7 +87,7 @@ jobs: run: cargo clippy ${{ matrix.config.args }} --all-targets --all-features -- -D warnings - name: Sccache stats ${{ matrix.config.name }} - run: sccache --show-stats && sccache --zero-stats > /dev/null + run: ${SCCACHE_PATH} --show-stats && ${SCCACHE_PATH} --zero-stats > /dev/null - name: Test ${{ matrix.config.name }} env: @@ -79,7 +95,7 @@ jobs: run: cargo ${{ matrix.config.test }} - name: Sccache stats ${{ matrix.config.name }} - run: sccache --show-stats + run: ${SCCACHE_PATH} --show-stats - name: Upload coverage to Codecov if: matrix.config.name == 'Normal' @@ -122,12 +138,8 @@ jobs: with: submodules: recursive - - name: Configure sccache - uses: actions/github-script@v7 - with: - script: | - core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); - core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); + - name: Setup sccache-cache + uses: mozilla-actions/sccache-action@v0.0.5 - name: Build xline image run: | diff --git a/.github/workflows/validation.yml b/.github/workflows/validation.yml index 4e6f4b6e6..cef9c3851 100644 --- a/.github/workflows/validation.yml +++ b/.github/workflows/validation.yml @@ -16,18 +16,14 @@ jobs: with: submodules: recursive - - name: Configure sccache - uses: actions/github-script@v7 - with: - script: | - core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); - core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); + - name: Setup sccache-cache + uses: mozilla-actions/sccache-action@v0.0.5 call_build_xline: name: Build and Upload Artifacts uses: ./.github/workflows/build_xline.yml with: - docker_xline_image: 'ghcr.io/xline-kv/build-env:latest' + docker_xline_image: "ghcr.io/xline-kv/build-env:latest" additional_setup_commands: | sudo apt-get install -y --force-yes expect ldd ./xline @@ -35,6 +31,6 @@ jobs: cp ../fixtures/{private,public}.pem . docker build . -t ghcr.io/xline-kv/xline:latest docker pull gcr.io/etcd-development/etcd:v3.5.5 - binaries: 'xline,benchmark' - script_name: 'validation_test.sh' + binaries: "xline,benchmark" + script_name: "validation_test.sh" uploadLogs: true diff --git a/Cargo.toml b/Cargo.toml index e0220e105..27e783b45 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,3 +27,4 @@ madsim = { git = "https://github.com/Phoenix500526/madsim.git", branch = "update madsim-tonic = { git = "https://github.com/Phoenix500526/madsim.git", branch = "update-tonic" } madsim-tonic-build = { git = "https://github.com/Phoenix500526/madsim.git", branch = "update-tonic" } madsim-tokio = { git = "https://github.com/Phoenix500526/madsim.git", branch = "update-tonic" } + diff --git a/ci/Dockerfile b/ci/Dockerfile index 6c6d2aa1e..ab5ac71e9 100644 --- a/ci/Dockerfile +++ b/ci/Dockerfile @@ -28,9 +28,7 @@ RUN echo "=== Install rusty stuff 🦀️ ===" && \ rustup component add rustfmt llvm-tools clippy && \ rustup show -v && \ curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash && \ - cargo binstall -y --no-symlinks cargo-llvm-cov cargo-nextest cargo-hakari cargo-sort cargo-cache cargo-audit cargo-machete && \ - cargo install --locked sccache && \ - cargo cache --autoclean && \ + cargo binstall -y --no-symlinks cargo-llvm-cov cargo-nextest cargo-hakari cargo-sort cargo-cache cargo-audit cargo-machete sccache && \ rm -rf "/root/.cargo/registry/index" && \ rm -rf "/root/.cargo/registry/cache" && \ rm -rf "/root/.cargo/git/db" && \ @@ -65,3 +63,4 @@ ENV CARGO_TERM_COLOR=always # Enable sccache ENV RUSTC_WRAPPER="sccache" +ENV SCCACHE_GHA_ENABLED="true" From ac110644b3dd1a8bea893c0581fae52c406d66f3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Aug 2024 05:19:52 +0000 Subject: [PATCH 020/322] chore(deps): bump uuid from 1.9.0 to 1.10.0 Bumps [uuid](https://github.com/uuid-rs/uuid) from 1.9.0 to 1.10.0. - [Release notes](https://github.com/uuid-rs/uuid/releases) - [Commits](https://github.com/uuid-rs/uuid/compare/1.9.0...1.10.0) --- updated-dependencies: - dependency-name: uuid dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- crates/xline/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9e7805bff..6ee8ac378 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3445,9 +3445,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ea73390fe27785838dcbf75b91b1d84799e28f1ce71e6f372a5dc2200c80de5" +checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" dependencies = [ "getrandom", ] diff --git a/crates/xline/Cargo.toml b/crates/xline/Cargo.toml index 6383c0a81..9c2c1b744 100644 --- a/crates/xline/Cargo.toml +++ b/crates/xline/Cargo.toml @@ -70,7 +70,7 @@ tracing-appender = "0.2" tracing-opentelemetry = "0.23.0" tracing-subscriber = { version = "0.3.16", features = ["env-filter"] } utils = { path = "../utils", features = ["parking_lot"] } -uuid = { version = "1.9.0", features = ["v4"] } +uuid = { version = "1.10.0", features = ["v4"] } workspace-hack = { version = "0.1", path = "../../workspace-hack" } x509-certificate = "0.23.1" xlineapi = { path = "../xlineapi" } From c5991f38cf040d5423198c2eebfafea2b68f0919 Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Sun, 21 Jul 2024 22:25:03 +0800 Subject: [PATCH 021/322] refactor(client)!: refactor KvClient::range Signed-off-by: lxl66566 range Signed-off-by: lxl66566 --- crates/benchmark/src/bench_client.rs | 16 ++- crates/simulation/src/xline_group.rs | 15 ++- crates/xline-client/README.md | 5 +- crates/xline-client/examples/kv.rs | 10 +- crates/xline-client/src/clients/kv.rs | 21 ++-- crates/xline-client/src/types/kv.rs | 63 +++++------ crates/xline-client/tests/it/kv.rs | 39 ++++--- crates/xline/tests/it/auth_test.rs | 8 +- crates/xline/tests/it/kv_test.rs | 122 ++++++++++++++-------- crates/xline/tests/it/lease_test.rs | 15 ++- crates/xline/tests/it/maintenance_test.rs | 4 +- crates/xlinectl/src/command/get.rs | 71 ++++++++----- crates/xlinectl/src/command/txn.rs | 11 +- 13 files changed, 232 insertions(+), 168 deletions(-) diff --git a/crates/benchmark/src/bench_client.rs b/crates/benchmark/src/bench_client.rs index 6f59cce77..15cdd07a8 100644 --- a/crates/benchmark/src/bench_client.rs +++ b/crates/benchmark/src/bench_client.rs @@ -4,7 +4,7 @@ use anyhow::Result; use etcd_client::{Client as EtcdClient, ConnectOptions}; use thiserror::Error; #[cfg(test)] -use xline_client::types::kv::{RangeRequest, RangeResponse}; +use xline_client::types::kv::{RangeOptions, RangeResponse}; use xline_client::{ error::XlineClientError, types::kv::{PutOptions, PutResponse}, @@ -125,15 +125,16 @@ impl BenchClient { #[cfg(test)] pub(crate) async fn get( &mut self, - request: RangeRequest, + key: impl Into>, + options: Option, ) -> Result { match self.kv_client { KVClient::Xline(ref mut xline_client) => { - let response = xline_client.kv_client().range(request).await?; + let response = xline_client.kv_client().range(key, options).await?; Ok(response) } KVClient::Etcd(ref mut etcd_client) => { - let response = etcd_client.get(request.key(), None).await?; + let response = etcd_client.get(key.into(), None).await?; Ok(convert::get_res(response)) } } @@ -215,7 +216,6 @@ mod convert { #[allow(clippy::unwrap_used)] #[allow(clippy::indexing_slicing)] mod test { - use xline_client::types::kv::RangeRequest; use xline_test_utils::Cluster; use crate::bench_client::{BenchClient, ClientOptions}; @@ -232,8 +232,7 @@ mod test { .unwrap(); //check xline client put value exist let _put_response = client.put("put", "123", None).await; - let range_request = RangeRequest::new("put"); - let response = client.get(range_request).await.unwrap(); + let response = client.get("put", None).await.unwrap(); assert_eq!(response.kvs[0].value, b"123"); } @@ -248,8 +247,7 @@ mod test { .unwrap(); let _put_response = client.put("put", "123", None).await; - let range_request = RangeRequest::new("put"); - let response = client.get(range_request).await.unwrap(); + let response = client.get("put", None).await.unwrap(); assert_eq!(response.kvs[0].value, b"123"); } } diff --git a/crates/simulation/src/xline_group.rs b/crates/simulation/src/xline_group.rs index bfa908e24..262642b72 100644 --- a/crates/simulation/src/xline_group.rs +++ b/crates/simulation/src/xline_group.rs @@ -14,7 +14,7 @@ use xline_client::{ types::{ cluster::{MemberAddRequest, MemberAddResponse, MemberListRequest, MemberListResponse}, kv::{ - CompactionRequest, CompactionResponse, PutOptions, PutResponse, RangeRequest, + CompactionRequest, CompactionResponse, PutOptions, PutResponse, RangeOptions, RangeResponse, }, watch::{WatchOptions, WatchStreaming, Watcher}, @@ -189,7 +189,18 @@ impl SimClient { .await .unwrap() } - impl_client_method!(range, kv_client, RangeRequest, RangeResponse); + pub async fn range( + &self, + key: impl Into>, + options: Option, + ) -> Result> { + let client = self.inner.clone(); + let key = key.into(); + self.handle + .spawn(async move { client.kv_client().range(key, options).await }) + .await + .unwrap() + } impl_client_method!(compact, kv_client, CompactionRequest, CompactionResponse); pub async fn watch( &self, diff --git a/crates/xline-client/README.md b/crates/xline-client/README.md index 3147b51e8..930fd1268 100644 --- a/crates/xline-client/README.md +++ b/crates/xline-client/README.md @@ -81,7 +81,7 @@ To create a xline client: ```rust, no_run use xline_client::{ - types::kv::{PutOptions, RangeRequest}, + types::kv::{PutOptions, RangeOptions}, Client, ClientOptions, }; use anyhow::Result; @@ -97,7 +97,8 @@ To create a xline client: client.put("key", "value", None).await?; - let resp = client.range(RangeRequest::new("key")).await?; + let resp = client.range("key", None).await?; + // let resp = client.range("key2", Some(RangeOptions::default().with_limit(6))).await?; if let Some(kv) = resp.kvs.first() { println!( diff --git a/crates/xline-client/examples/kv.rs b/crates/xline-client/examples/kv.rs index e30df8f1a..ad71eb5c7 100644 --- a/crates/xline-client/examples/kv.rs +++ b/crates/xline-client/examples/kv.rs @@ -1,8 +1,8 @@ use anyhow::Result; use xline_client::{ types::kv::{ - CompactionRequest, Compare, CompareResult, DeleteRangeRequest, PutOptions, RangeRequest, - TxnOp, TxnRequest, + CompactionRequest, Compare, CompareResult, DeleteRangeRequest, PutOptions, TxnOp, + TxnRequest, }, Client, ClientOptions, }; @@ -21,7 +21,7 @@ async fn main() -> Result<()> { client.put("key2", "value2", None).await?; // range - let resp = client.range(RangeRequest::new("key1")).await?; + let resp = client.range("key1", None).await?; if let Some(kv) = resp.kvs.first() { println!( @@ -54,10 +54,10 @@ async fn main() -> Result<()> { Some(PutOptions::default().with_prev_kv(true)), )][..], ) - .or_else(&[TxnOp::range(RangeRequest::new("key2"))][..]); + .or_else(&[TxnOp::range("key2", None)][..]); let _resp = client.txn(txn_req).await?; - let resp = client.range(RangeRequest::new("key2")).await?; + let resp = client.range("key2", None).await?; // should print "value3" if let Some(kv) = resp.kvs.first() { println!( diff --git a/crates/xline-client/src/clients/kv.rs b/crates/xline-client/src/clients/kv.rs index 2ab36dbc6..be5050559 100644 --- a/crates/xline-client/src/clients/kv.rs +++ b/crates/xline-client/src/clients/kv.rs @@ -8,7 +8,7 @@ use xlineapi::{ use crate::{ error::Result, - types::kv::{CompactionRequest, DeleteRangeRequest, PutOptions, RangeRequest, TxnRequest}, + types::kv::{CompactionRequest, DeleteRangeRequest, PutOptions, RangeOptions, TxnRequest}, AuthService, CurpClient, }; @@ -109,7 +109,7 @@ impl KvClient { /// # Examples /// /// ```no_run - /// use xline_client::{types::kv::RangeRequest, Client, ClientOptions}; + /// use xline_client::{types::kv::RangeOptions, Client, ClientOptions}; /// use anyhow::Result; /// /// #[tokio::main] @@ -120,7 +120,8 @@ impl KvClient { /// .await? /// .kv_client(); /// - /// let resp = client.range(RangeRequest::new("key1")).await?; + /// let resp = client.range("key1", None).await?; + /// let resp = client.range("key2", Some(RangeOptions::default().with_limit(6))).await?; /// /// if let Some(kv) = resp.kvs.first() { /// println!( @@ -134,8 +135,14 @@ impl KvClient { /// } /// ``` #[inline] - pub async fn range(&self, request: RangeRequest) -> Result { - let request = RequestWrapper::from(xlineapi::RangeRequest::from(request)); + pub async fn range( + &self, + key: impl Into>, + options: Option, + ) -> Result { + let request = RequestWrapper::from(xlineapi::RangeRequest::from( + options.unwrap_or_default().with_key(key), + )); let cmd = Command::new(request); let (cmd_res, _sync_res) = self .curp_client @@ -191,7 +198,7 @@ impl KvClient { /// /// ```no_run /// use xline_client::{ - /// types::kv::{Compare, PutOptions, RangeRequest, TxnOp, TxnRequest, CompareResult}, + /// types::kv::{Compare, PutOptions, TxnOp, TxnRequest, CompareResult}, /// Client, ClientOptions, /// }; /// use anyhow::Result; @@ -209,7 +216,7 @@ impl KvClient { /// .and_then( /// &[TxnOp::put("key2", "value3", Some(PutOptions::default().with_prev_kv(true)))][..], /// ) - /// .or_else(&[TxnOp::range(RangeRequest::new("key2"))][..]); + /// .or_else(&[TxnOp::range("key2", None)][..]); /// /// let _resp = client.txn(txn_req).await?; /// diff --git a/crates/xline-client/src/types/kv.rs b/crates/xline-client/src/types/kv.rs index f6f1bc14b..96eb09899 100644 --- a/crates/xline-client/src/types/kv.rs +++ b/crates/xline-client/src/types/kv.rs @@ -4,6 +4,8 @@ pub use xlineapi::{ RangeResponse, Response, ResponseOp, SortOrder, SortTarget, TargetUnion, TxnResponse, }; +use super::range_end::RangeOption; + /// Options for `Put`, as same as the `PutRequest` for `Put`. #[derive(Debug, PartialEq, Default)] pub struct PutOptions { @@ -108,37 +110,29 @@ impl From for xlineapi::PutRequest { } } -/// Request type for `Range` -#[derive(Debug, PartialEq)] -pub struct RangeRequest { - /// Inner request +/// Options for `range` function. +#[derive(Debug, PartialEq, Default)] +pub struct RangeOptions { + /// Inner request, RangeRequest = inner + key + range_end inner: xlineapi::RangeRequest, + /// Range end options, indicates how to generate `range_end` from key. + range_end_options: RangeOption, } -impl RangeRequest { - /// Creates a new `RangeRequest` - /// +impl RangeOptions { /// `key` is the first key for the range. If `range_end` is not given, the request only looks up key. #[inline] - pub fn new(key: impl Into>) -> Self { - Self { - inner: xlineapi::RangeRequest { - key: key.into(), - ..Default::default() - }, - } + #[must_use] + pub fn with_key(mut self, key: impl Into>) -> Self { + self.inner.key = key.into(); + self } /// If set, Xline will return all keys with the matching prefix #[inline] #[must_use] pub fn with_prefix(mut self) -> Self { - if self.inner.key.is_empty() { - self.inner.key = vec![0]; - self.inner.range_end = vec![0]; - } else { - self.inner.range_end = KeyRange::get_prefix(&self.inner.key); - } + self.range_end_options = RangeOption::Prefix; self } @@ -146,10 +140,7 @@ impl RangeRequest { #[inline] #[must_use] pub fn with_from_key(mut self) -> Self { - if self.inner.key.is_empty() { - self.inner.key = vec![0]; - } - self.inner.range_end = vec![0]; + self.range_end_options = RangeOption::FromKey; self } @@ -158,7 +149,7 @@ impl RangeRequest { #[inline] #[must_use] pub fn with_range_end(mut self, range_end: impl Into>) -> Self { - self.inner.range_end = range_end.into(); + self.range_end_options = RangeOption::RangeEnd(range_end.into()); self } @@ -263,18 +254,11 @@ impl RangeRequest { self } - /// Get `key` + /// Get `range_end_options` #[inline] #[must_use] - pub fn key(&self) -> &[u8] { - &self.inner.key - } - - /// Get `range_end` - #[inline] - #[must_use] - pub fn range_end(&self) -> &[u8] { - &self.inner.range_end + pub fn range_end_options(&self) -> &RangeOption { + &self.range_end_options } /// Get `limit` @@ -355,9 +339,10 @@ impl RangeRequest { } } -impl From for xlineapi::RangeRequest { +impl From for xlineapi::RangeRequest { #[inline] - fn from(req: RangeRequest) -> Self { + fn from(mut req: RangeOptions) -> Self { + req.inner.range_end = req.range_end_options.get_range_end(&mut req.inner.key); req.inner } } @@ -580,9 +565,9 @@ impl TxnOp { /// Creates a `Range` operation. #[inline] #[must_use] - pub fn range(request: RangeRequest) -> Self { + pub fn range(key: impl Into>, option: Option) -> Self { TxnOp { - inner: xlineapi::Request::RequestRange(request.into()), + inner: xlineapi::Request::RequestRange(option.unwrap_or_default().with_key(key).into()), } } diff --git a/crates/xline-client/tests/it/kv.rs b/crates/xline-client/tests/it/kv.rs index 69cf7ac17..9b4287b17 100644 --- a/crates/xline-client/tests/it/kv.rs +++ b/crates/xline-client/tests/it/kv.rs @@ -1,9 +1,10 @@ //! The following tests are originally from `etcd-client` + use test_macros::abort_on_panic; use xline_client::{ error::Result, types::kv::{ - CompactionRequest, Compare, CompareResult, DeleteRangeRequest, PutOptions, RangeRequest, + CompactionRequest, Compare, CompareResult, DeleteRangeRequest, PutOptions, RangeOptions, TxnOp, TxnRequest, }, }; @@ -58,7 +59,7 @@ async fn range_should_fetches_previously_put_keys() -> Result<()> { // get key { - let resp = client.range(RangeRequest::new("get11")).await?; + let resp = client.range("get11", None).await?; assert_eq!(resp.count, 1); assert!(!resp.more); assert_eq!(resp.kvs.len(), 1); @@ -69,7 +70,10 @@ async fn range_should_fetches_previously_put_keys() -> Result<()> { // get from key { let resp = client - .range(RangeRequest::new("get11").with_from_key().with_limit(2)) + .range( + "get11", + Some(RangeOptions::default().with_from_key().with_limit(2)), + ) .await?; assert!(resp.more); assert_eq!(resp.kvs.len(), 2); @@ -82,7 +86,7 @@ async fn range_should_fetches_previously_put_keys() -> Result<()> { // get prefix keys { let resp = client - .range(RangeRequest::new("get1").with_prefix()) + .range("get1", Some(RangeOptions::default().with_prefix())) .await?; assert_eq!(resp.count, 2); assert!(!resp.more); @@ -118,7 +122,7 @@ async fn delete_should_remove_previously_put_kvs() -> Result<()> { assert_eq!(&resp.prev_kvs[0].key, "del11".as_bytes()); assert_eq!(&resp.prev_kvs[0].value, "11".as_bytes()); let resp = client - .range(RangeRequest::new("del11").with_count_only(true)) + .range("del11", Some(RangeOptions::default().with_count_only(true))) .await?; assert_eq!(resp.count, 0); } @@ -139,9 +143,12 @@ async fn delete_should_remove_previously_put_kvs() -> Result<()> { assert_eq!(&resp.prev_kvs[1].value, "21".as_bytes()); let resp = client .range( - RangeRequest::new("del11") - .with_range_end("del22") - .with_count_only(true), + "del11", + Some( + RangeOptions::default() + .with_range_end("del22") + .with_count_only(true), + ), ) .await?; assert_eq!(resp.count, 0); @@ -162,7 +169,7 @@ async fn delete_should_remove_previously_put_kvs() -> Result<()> { assert_eq!(&resp.prev_kvs[1].key, "del32".as_bytes()); assert_eq!(&resp.prev_kvs[1].value, "32".as_bytes()); let resp = client - .range(RangeRequest::new("del32").with_count_only(true)) + .range("del32", Some(RangeOptions::default().with_count_only(true))) .await?; assert_eq!(resp.count, 0); } @@ -191,7 +198,7 @@ async fn txn_should_execute_as_expected() -> Result<()> { Some(PutOptions::default().with_prev_kv(true)), )][..], ) - .or_else(&[TxnOp::range(RangeRequest::new("txn01"))][..]), + .or_else(&[TxnOp::range("txn01", None)][..]), ) .await?; @@ -206,7 +213,7 @@ async fn txn_should_execute_as_expected() -> Result<()> { _ => panic!("expect put response)"), } - let resp = client.range(RangeRequest::new("txn01")).await?; + let resp = client.range("txn01", None).await?; assert_eq!(resp.kvs[0].key, b"txn01"); assert_eq!(resp.kvs[0].value, b"02"); } @@ -218,7 +225,7 @@ async fn txn_should_execute_as_expected() -> Result<()> { TxnRequest::new() .when(&[Compare::value("txn01", CompareResult::Equal, "01")][..]) .and_then(&[TxnOp::put("txn01", "02", None)][..]) - .or_else(&[TxnOp::range(RangeRequest::new("txn01"))][..]), + .or_else(&[TxnOp::range("txn01", None)][..]), ) .await?; @@ -248,11 +255,11 @@ async fn compact_should_remove_previous_revision() -> Result<()> { // before compacting let rev0_resp = client - .range(RangeRequest::new("compact").with_revision(2)) + .range("compact", Some(RangeOptions::default().with_revision(2))) .await?; assert_eq!(rev0_resp.kvs[0].value, b"0"); let rev1_resp = client - .range(RangeRequest::new("compact").with_revision(3)) + .range("compact", Some(RangeOptions::default().with_revision(3))) .await?; assert_eq!(rev1_resp.kvs[0].value, b"1"); @@ -260,14 +267,14 @@ async fn compact_should_remove_previous_revision() -> Result<()> { // after compacting let rev0_resp = client - .range(RangeRequest::new("compact").with_revision(2)) + .range("compact", Some(RangeOptions::default().with_revision(2))) .await; assert!( rev0_resp.is_err(), "client.range should receive an err after compaction, but it receives: {rev0_resp:?}" ); let rev1_resp = client - .range(RangeRequest::new("compact").with_revision(3)) + .range("compact", Some(RangeOptions::default().with_revision(3))) .await?; assert_eq!(rev1_resp.kvs[0].value, b"1"); diff --git a/crates/xline/tests/it/auth_test.rs b/crates/xline/tests/it/auth_test.rs index 67040269b..935fcc0c5 100644 --- a/crates/xline/tests/it/auth_test.rs +++ b/crates/xline/tests/it/auth_test.rs @@ -6,7 +6,7 @@ use utils::config::{ TraceConfig, XlineServerConfig, }; use xline_test_utils::{ - enable_auth, set_user, types::kv::RangeRequest, Client, ClientOptions, Cluster, + enable_auth, set_user, types::kv::RangeOptions, Client, ClientOptions, Cluster, }; #[tokio::test(flavor = "multi_thread")] @@ -17,7 +17,7 @@ async fn test_auth_empty_user_get() -> Result<(), Box> { let client = cluster.client().await; enable_auth(client).await?; - let res = client.kv_client().range(RangeRequest::new("foo")).await; + let res = client.kv_client().range("foo", None).await; assert!(res.is_err()); Ok(()) @@ -122,11 +122,11 @@ async fn test_kv_authorization() -> Result<(), Box> { assert!(result.is_err()); let result = u2_client - .range(RangeRequest::new("foo").with_range_end("fox")) + .range("foo", Some(RangeOptions::default().with_range_end("fox"))) .await; assert!(result.is_ok()); let result = u2_client - .range(RangeRequest::new("foo").with_range_end("foz")) + .range("foo", Some(RangeOptions::default().with_range_end("foz"))) .await; assert!(result.is_err()); diff --git a/crates/xline/tests/it/kv_test.rs b/crates/xline/tests/it/kv_test.rs index 367de79c7..5def9f564 100644 --- a/crates/xline/tests/it/kv_test.rs +++ b/crates/xline/tests/it/kv_test.rs @@ -3,7 +3,7 @@ use std::{error::Error, time::Duration}; use test_macros::abort_on_panic; use xline_test_utils::{ types::kv::{ - Compare, CompareResult, DeleteRangeRequest, PutOptions, RangeRequest, Response, SortOrder, + Compare, CompareResult, DeleteRangeRequest, PutOptions, RangeOptions, Response, SortOrder, SortTarget, TxnOp, TxnRequest, }, Client, ClientOptions, Cluster, @@ -62,7 +62,8 @@ async fn test_kv_put() -> Result<(), Box> { #[abort_on_panic] async fn test_kv_get() -> Result<(), Box> { struct TestCase<'a> { - req: RangeRequest, + key: Vec, + opt: Option, want_kvs: &'a [&'a str], } @@ -77,82 +78,109 @@ async fn test_kv_get() -> Result<(), Box> { let tests = [ TestCase { - req: RangeRequest::new("a"), + key: "a".into(), + opt: None, want_kvs: &want_kvs[..1], }, TestCase { - req: RangeRequest::new("a").with_serializable(true), + key: "a".into(), + opt: Some(RangeOptions::default().with_serializable(true)), want_kvs: &want_kvs[..1], }, TestCase { - req: RangeRequest::new("a").with_range_end("c"), + key: "a".into(), + opt: Some(RangeOptions::default().with_range_end("c")), want_kvs: &want_kvs[..2], }, TestCase { - req: RangeRequest::new("").with_prefix(), + key: "".into(), + opt: Some(RangeOptions::default().with_prefix()), want_kvs: &want_kvs[..], }, TestCase { - req: RangeRequest::new("").with_from_key(), + key: "".into(), + opt: Some(RangeOptions::default().with_from_key()), want_kvs: &want_kvs[..], }, TestCase { - req: RangeRequest::new("a").with_range_end("x"), + key: "a".into(), + opt: Some(RangeOptions::default().with_range_end("x")), want_kvs: &want_kvs[..], }, TestCase { - req: RangeRequest::new("").with_prefix().with_revision(4), + key: "".into(), + opt: Some(RangeOptions::default().with_prefix().with_revision(4)), want_kvs: &want_kvs[..3], }, TestCase { - req: RangeRequest::new("a").with_count_only(true), + key: "a".into(), + opt: Some(RangeOptions::default().with_count_only(true)), want_kvs: &[], }, TestCase { - req: RangeRequest::new("foo").with_prefix(), + key: "foo".into(), + opt: Some(RangeOptions::default().with_prefix()), want_kvs: &["foo", "foo/abc"], }, TestCase { - req: RangeRequest::new("foo").with_from_key(), + key: "foo".into(), + opt: Some(RangeOptions::default().with_from_key()), want_kvs: &["foo", "foo/abc", "fop"], }, TestCase { - req: RangeRequest::new("").with_prefix().with_limit(2), + key: "".into(), + opt: Some(RangeOptions::default().with_prefix().with_limit(2)), want_kvs: &want_kvs[..2], }, TestCase { - req: RangeRequest::new("") - .with_prefix() - .with_sort_target(SortTarget::Mod) - .with_sort_order(SortOrder::Ascend), + key: "".into(), + opt: Some( + RangeOptions::default() + .with_prefix() + .with_sort_order(SortOrder::Descend) + .with_sort_order(SortOrder::Ascend), + ), want_kvs: &want_kvs[..], }, TestCase { - req: RangeRequest::new("") - .with_prefix() - .with_sort_target(SortTarget::Version) - .with_sort_order(SortOrder::Ascend), + key: "".into(), + opt: Some( + RangeOptions::default() + .with_prefix() + .with_sort_target(SortTarget::Version) + .with_sort_order(SortOrder::Ascend), + ), + want_kvs: &kvs_by_version[..], }, TestCase { - req: RangeRequest::new("") - .with_prefix() - .with_sort_target(SortTarget::Create) - .with_sort_order(SortOrder::None), + key: "".into(), + opt: Some( + RangeOptions::default() + .with_prefix() + .with_sort_target(SortTarget::Create) + .with_sort_order(SortOrder::None), + ), want_kvs: &want_kvs[..], }, TestCase { - req: RangeRequest::new("") - .with_prefix() - .with_sort_target(SortTarget::Create) - .with_sort_order(SortOrder::Descend), + key: "".into(), + opt: Some( + RangeOptions::default() + .with_prefix() + .with_sort_target(SortTarget::Create) + .with_sort_order(SortOrder::Descend), + ), want_kvs: &reversed_kvs[..], }, TestCase { - req: RangeRequest::new("") - .with_prefix() - .with_sort_target(SortTarget::Key) - .with_sort_order(SortOrder::Descend), + key: "".into(), + opt: Some( + RangeOptions::default() + .with_prefix() + .with_sort_target(SortTarget::Key) + .with_sort_order(SortOrder::Descend), + ), want_kvs: &reversed_kvs[..], }, ]; @@ -162,7 +190,7 @@ async fn test_kv_get() -> Result<(), Box> { } for test in tests { - let res = client.range(test.req).await?; + let res = client.range(test.key, test.opt).await?; assert_eq!(res.kvs.len(), test.want_kvs.len()); let is_identical = res .kvs @@ -187,7 +215,7 @@ async fn test_range_redirect() -> Result<(), Box> { .kv_client(); let _ignore = kv_client.put("foo", "bar", None).await?; tokio::time::sleep(Duration::from_millis(300)).await; - let res = kv_client.range(RangeRequest::new("foo")).await?; + let res = kv_client.range("foo", None).await?; assert_eq!(res.kvs.len(), 1); assert_eq!(res.kvs[0].value, b"bar"); @@ -255,7 +283,9 @@ async fn test_kv_delete() -> Result<(), Box> { let res = client.delete(test.req).await?; assert_eq!(res.deleted, test.want_deleted); - let res = client.range(RangeRequest::new("").with_prefix()).await?; + let res = client + .range("", Some(RangeOptions::default().with_prefix())) + .await?; let is_identical = res .kvs .iter() @@ -282,7 +312,7 @@ async fn test_txn() -> Result<(), Box> { let read_write_txn_req = TxnRequest::new() .when(&[Compare::value("b", CompareResult::Equal, "bar")][..]) .and_then(&[TxnOp::put("f", "foo", None)][..]) - .or_else(&[TxnOp::range(RangeRequest::new("a"))][..]); + .or_else(&[TxnOp::range("a", None)][..]); let res = client.txn(read_write_txn_req).await?; assert!(res.succeeded); @@ -294,8 +324,8 @@ async fn test_txn() -> Result<(), Box> { let read_only_txn = TxnRequest::new() .when(&[Compare::version("b", CompareResult::Greater, 10)][..]) - .and_then(&[TxnOp::range(RangeRequest::new("a"))][..]) - .or_else(&[TxnOp::range(RangeRequest::new("b"))][..]); + .and_then(&[TxnOp::range("a", None)][..]) + .or_else(&[TxnOp::range("b", None)][..]); let mut res = client.txn(read_only_txn).await?; assert!(!res.succeeded); assert_eq!(res.responses.len(), 1); @@ -317,8 +347,18 @@ async fn test_txn() -> Result<(), Box> { let serializable_txn = TxnRequest::new() .when([]) - .and_then(&[TxnOp::range(RangeRequest::new("c").with_serializable(true))][..]) - .or_else(&[TxnOp::range(RangeRequest::new("d").with_serializable(true))][..]); + .and_then( + &[TxnOp::range( + "c", + Some(RangeOptions::default().with_serializable(true)), + )][..], + ) + .or_else( + &[TxnOp::range( + "d", + Some(RangeOptions::default().with_serializable(true)), + )][..], + ); let mut res = client.txn(serializable_txn).await?; assert!(res.succeeded); assert_eq!(res.responses.len(), 1); diff --git a/crates/xline/tests/it/lease_test.rs b/crates/xline/tests/it/lease_test.rs index df1bda72e..ca4b8b67f 100644 --- a/crates/xline/tests/it/lease_test.rs +++ b/crates/xline/tests/it/lease_test.rs @@ -2,10 +2,7 @@ use std::{error::Error, time::Duration}; use test_macros::abort_on_panic; use tracing::info; -use xline_test_utils::{ - types::kv::{PutOptions, RangeRequest}, - Client, ClientOptions, Cluster, -}; +use xline_test_utils::{types::kv::PutOptions, Client, ClientOptions, Cluster}; #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] @@ -26,13 +23,13 @@ async fn test_lease_expired() -> Result<(), Box> { Some(PutOptions::default().with_lease(lease_id)), ) .await?; - let res = client.kv_client().range(RangeRequest::new("foo")).await?; + let res = client.kv_client().range("foo", None).await?; assert_eq!(res.kvs.len(), 1); assert_eq!(res.kvs[0].value, b"bar"); tokio::time::sleep(Duration::from_secs(3)).await; - let res = client.kv_client().range(RangeRequest::new("foo")).await?; + let res = client.kv_client().range("foo", None).await?; assert_eq!(res.kvs.len(), 0); Ok(()) @@ -58,7 +55,7 @@ async fn test_lease_keep_alive() -> Result<(), Box> { Some(PutOptions::default().with_lease(lease_id)), ) .await?; - let res = client.kv_client().range(RangeRequest::new("foo")).await?; + let res = client.kv_client().range("foo", None).await?; assert_eq!(res.kvs.len(), 1); assert_eq!(res.kvs[0].value, b"bar"); @@ -77,13 +74,13 @@ async fn test_lease_keep_alive() -> Result<(), Box> { }); tokio::time::sleep(Duration::from_secs(3)).await; - let res = client.kv_client().range(RangeRequest::new("foo")).await?; + let res = client.kv_client().range("foo", None).await?; assert_eq!(res.kvs.len(), 1); assert_eq!(res.kvs[0].value, b"bar"); handle.abort(); tokio::time::sleep(Duration::from_secs(2)).await; - let res = client.kv_client().range(RangeRequest::new("foo")).await?; + let res = client.kv_client().range("foo", None).await?; assert_eq!(res.kvs.len(), 0); Ok(()) diff --git a/crates/xline/tests/it/maintenance_test.rs b/crates/xline/tests/it/maintenance_test.rs index cc1dc1100..eb0f5ed0e 100644 --- a/crates/xline/tests/it/maintenance_test.rs +++ b/crates/xline/tests/it/maintenance_test.rs @@ -5,7 +5,7 @@ use tokio::io::AsyncWriteExt; #[cfg(test)] use xline::restore::restore; use xline_client::error::XlineClientError; -use xline_test_utils::{types::kv::RangeRequest, Client, ClientOptions, Cluster}; +use xline_test_utils::{Client, ClientOptions, Cluster}; use xlineapi::{execute_error::ExecuteError, AlarmAction, AlarmRequest, AlarmType}; #[tokio::test(flavor = "multi_thread")] @@ -42,7 +42,7 @@ async fn test_snapshot_and_restore() -> Result<(), Box> { let mut new_cluster = Cluster::new_with_configs(restore_cluster_configs).await; new_cluster.start().await; let client = new_cluster.client().await.kv_client(); - let res = client.range(RangeRequest::new("key")).await?; + let res = client.range("key", None).await?; assert_eq!(res.kvs.len(), 1); assert_eq!(res.kvs[0].key, b"key"); assert_eq!(res.kvs[0].value, b"value"); diff --git a/crates/xlinectl/src/command/get.rs b/crates/xlinectl/src/command/get.rs index 0feaad007..d7ed32ec7 100644 --- a/crates/xlinectl/src/command/get.rs +++ b/crates/xlinectl/src/command/get.rs @@ -1,10 +1,13 @@ use anyhow::Result; use clap::{arg, value_parser, ArgMatches, Command}; -use xline_client::{types::kv::RangeRequest, Client}; +use xline_client::{types::kv::RangeOptions, Client}; use xlineapi::{SortOrder, SortTarget}; use crate::utils::printer::Printer; +/// Temp struct for building command, indicates `(key, rangeoptions)` +type RangeRequest = (Vec, RangeOptions); + /// Definition of `get` command pub(crate) fn command() -> Command { Command::new("get") @@ -66,24 +69,24 @@ pub(crate) fn build_request(matches: &ArgMatches) -> RangeRequest { let keys_only = matches.get_flag("keys_only"); let count_only = matches.get_flag("count_only"); - let mut request = RangeRequest::new(key.as_bytes()); + let mut options = RangeOptions::default(); if let Some(range_end) = range_end { - request = request.with_range_end(range_end.as_bytes()); + options = options.with_range_end(range_end.as_bytes()); } - request = match consistency.as_str() { - "L" => request.with_serializable(false), - "S" => request.with_serializable(true), + options = match consistency.as_str() { + "L" => options.with_serializable(false), + "S" => options.with_serializable(true), _ => unreachable!("The format should be checked by Clap."), }; if let Some(order) = order { - request = request.with_sort_order(match order.as_str() { + options = options.with_sort_order(match order.as_str() { "ASCEND" => SortOrder::Ascend, "DESCEND" => SortOrder::Descend, _ => unreachable!("The format should be checked by Clap."), }); } if let Some(sort_by) = sort_by { - request = request.with_sort_target(match sort_by.as_str() { + options = options.with_sort_target(match sort_by.as_str() { "CREATE" => SortTarget::Create, "KEY" => SortTarget::Key, "MODIFY" => SortTarget::Mod, @@ -92,24 +95,24 @@ pub(crate) fn build_request(matches: &ArgMatches) -> RangeRequest { _ => unreachable!("The format should be checked by Clap."), }); } - request = request.with_limit(*limit); + options = options.with_limit(*limit); if prefix { - request = request.with_prefix(); + options = options.with_prefix(); } if from_key { - request = request.with_from_key(); + options = options.with_from_key(); } - request = request.with_revision(*rev); - request = request.with_keys_only(keys_only); - request = request.with_count_only(count_only); + options = options.with_revision(*rev); + options = options.with_keys_only(keys_only); + options = options.with_count_only(count_only); - request + (key.as_bytes().to_vec(), options) } /// Execute the command pub(crate) async fn execute(client: &mut Client, matches: &ArgMatches) -> Result<()> { - let req = build_request(matches); - let resp = client.kv_client().range(req).await?; + let (key, options) = build_request(matches); + let resp = client.kv_client().range(key, Some(options)).await?; resp.print(); Ok(()) @@ -127,47 +130,59 @@ mod tests { let test_cases = vec![ TestCase::new( vec!["get", "key"], - Some(RangeRequest::new("key".as_bytes())), + Some(("key".into(), RangeOptions::default())), ), TestCase::new( vec!["get", "key", "key2"], - Some(RangeRequest::new("key".as_bytes()).with_range_end("key2".as_bytes())), + Some(( + "key".into(), + RangeOptions::default().with_range_end("key2".as_bytes()), + )), ), TestCase::new( vec!["get", "key", "--consistency", "L"], - Some(RangeRequest::new("key".as_bytes()).with_serializable(false)), + Some(( + "key".into(), + RangeOptions::default().with_serializable(false), + )), ), TestCase::new( vec!["get", "key", "--order", "DESCEND"], - Some(RangeRequest::new("key".as_bytes()).with_sort_order(SortOrder::Descend)), + Some(( + "key".into(), + RangeOptions::default().with_sort_order(SortOrder::Descend), + )), ), TestCase::new( vec!["get", "key", "--sort_by", "MODIFY"], - Some(RangeRequest::new("key".as_bytes()).with_sort_target(SortTarget::Mod)), + Some(( + "key".into(), + RangeOptions::default().with_sort_target(SortTarget::Mod), + )), ), TestCase::new( vec!["get", "key", "--limit", "10"], - Some(RangeRequest::new("key".as_bytes()).with_limit(10)), + Some(("key".into(), RangeOptions::default().with_limit(10))), ), TestCase::new( vec!["get", "key", "--prefix"], - Some(RangeRequest::new("key".as_bytes()).with_prefix()), + Some(("key".into(), RangeOptions::default().with_prefix())), ), TestCase::new( vec!["get", "key", "--from_key"], - Some(RangeRequest::new("key".as_bytes()).with_from_key()), + Some(("key".into(), RangeOptions::default().with_from_key())), ), TestCase::new( vec!["get", "key", "--rev", "5"], - Some(RangeRequest::new("key".as_bytes()).with_revision(5)), + Some(("key".into(), RangeOptions::default().with_revision(5))), ), TestCase::new( vec!["get", "key", "--keys_only"], - Some(RangeRequest::new("key".as_bytes()).with_keys_only(true)), + Some(("key".into(), RangeOptions::default().with_keys_only(true))), ), TestCase::new( vec!["get", "key", "--count_only"], - Some(RangeRequest::new("key".as_bytes()).with_count_only(true)), + Some(("key".into(), RangeOptions::default().with_count_only(true))), ), ]; diff --git a/crates/xlinectl/src/command/txn.rs b/crates/xlinectl/src/command/txn.rs index 4620e913c..cb4a311be 100644 --- a/crates/xlinectl/src/command/txn.rs +++ b/crates/xlinectl/src/command/txn.rs @@ -145,7 +145,7 @@ fn parse_op_line(line: &str) -> Result { "get" => { let matches = get_cmd.try_get_matches_from(args.clone())?; let req = get::build_request(&matches); - Ok(TxnOp::range(req)) + Ok(TxnOp::range(req.0, Some(req.1))) } "delete" => { let matches = delete_cmd.try_get_matches_from(args.clone())?; @@ -167,7 +167,7 @@ pub(crate) async fn execute(client: &mut Client, matches: &ArgMatches) -> Result #[cfg(test)] mod tests { - use xline_client::types::kv::RangeRequest; + use xline_client::types::kv::RangeOptions; use super::*; @@ -191,11 +191,14 @@ mod tests { ); assert_eq!( parse_op_line(r"get key1 key11").unwrap(), - TxnOp::range(RangeRequest::new("key1").with_range_end("key11")) + TxnOp::range( + "key1", + Some(RangeOptions::default().with_range_end("key11")) + ) ); assert_eq!( parse_op_line(r"get key1 --from_key").unwrap(), - TxnOp::range(RangeRequest::new("key1").with_from_key()) + TxnOp::range("key1", Some(RangeOptions::default().with_from_key())) ); } } From 7be4b4278526150417fb3415f6a38766f47cdb48 Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Mon, 22 Jul 2024 15:32:58 +0800 Subject: [PATCH 022/322] refactor(client)!: refactor KvClient::delete Signed-off-by: lxl66566 --- crates/xline-client/examples/kv.rs | 7 ++- crates/xline-client/src/clients/kv.rs | 16 +++++-- crates/xline-client/src/types/kv.rs | 67 ++++++++++----------------- crates/xline-client/tests/it/kv.rs | 25 ++++++---- crates/xline/tests/it/kv_test.rs | 28 +++++++---- crates/xline/tests/it/watch_test.rs | 4 +- crates/xlinectl/src/command/delete.rs | 35 ++++++++------ crates/xlinectl/src/command/txn.rs | 2 +- 8 files changed, 100 insertions(+), 84 deletions(-) diff --git a/crates/xline-client/examples/kv.rs b/crates/xline-client/examples/kv.rs index ad71eb5c7..6cd6f5d06 100644 --- a/crates/xline-client/examples/kv.rs +++ b/crates/xline-client/examples/kv.rs @@ -1,7 +1,7 @@ use anyhow::Result; use xline_client::{ types::kv::{ - CompactionRequest, Compare, CompareResult, DeleteRangeRequest, PutOptions, TxnOp, + CompactionRequest, Compare, CompareResult, DeleteRangeOptions, PutOptions, TxnOp, TxnRequest, }, Client, ClientOptions, @@ -33,7 +33,10 @@ async fn main() -> Result<()> { // delete let resp = client - .delete(DeleteRangeRequest::new("key1").with_prev_kv(true)) + .delete( + "key1", + Some(DeleteRangeOptions::default().with_prev_kv(true)), + ) .await?; for kv in resp.prev_kvs { diff --git a/crates/xline-client/src/clients/kv.rs b/crates/xline-client/src/clients/kv.rs index be5050559..726e15bd5 100644 --- a/crates/xline-client/src/clients/kv.rs +++ b/crates/xline-client/src/clients/kv.rs @@ -8,7 +8,7 @@ use xlineapi::{ use crate::{ error::Result, - types::kv::{CompactionRequest, DeleteRangeRequest, PutOptions, RangeOptions, TxnRequest}, + types::kv::{CompactionRequest, DeleteRangeOptions, PutOptions, RangeOptions, TxnRequest}, AuthService, CurpClient, }; @@ -159,7 +159,7 @@ impl KvClient { /// /// # Examples /// ```no_run - /// use xline_client::{types::kv::DeleteRangeRequest, Client, ClientOptions}; + /// use xline_client::{types::kv::DeleteRangeOptions, Client, ClientOptions}; /// use anyhow::Result; /// /// #[tokio::main] @@ -171,15 +171,21 @@ impl KvClient { /// .kv_client(); /// /// client - /// .delete(DeleteRangeRequest::new("key1").with_prev_kv(true)) + /// .delete("key1", Some(DeleteRangeOptions::default().with_prev_kv(true))) /// .await?; /// /// Ok(()) /// } /// ``` #[inline] - pub async fn delete(&self, request: DeleteRangeRequest) -> Result { - let request = RequestWrapper::from(xlineapi::DeleteRangeRequest::from(request)); + pub async fn delete( + &self, + key: impl Into>, + options: Option, + ) -> Result { + let request = RequestWrapper::from(xlineapi::DeleteRangeRequest::from( + options.unwrap_or_default().with_key(key), + )); let cmd = Command::new(request); let (cmd_res, _sync_res) = self .curp_client diff --git a/crates/xline-client/src/types/kv.rs b/crates/xline-client/src/types/kv.rs index 96eb09899..ec50bd973 100644 --- a/crates/xline-client/src/types/kv.rs +++ b/crates/xline-client/src/types/kv.rs @@ -348,36 +348,28 @@ impl From for xlineapi::RangeRequest { } /// Request type for `DeleteRange` -#[derive(Debug, PartialEq)] -pub struct DeleteRangeRequest { +#[derive(Debug, PartialEq, Default)] +pub struct DeleteRangeOptions { /// Inner request inner: xlineapi::DeleteRangeRequest, + /// Range end options + range_end_options: RangeOption, } -impl DeleteRangeRequest { - /// Creates a new `DeleteRangeRequest` - /// - /// `key` is the first key to delete in the range. +impl DeleteRangeOptions { + /// `key` is the first key for the range. If `range_end` is not given, the request only looks up key. #[inline] - pub fn new(key: impl Into>) -> Self { - Self { - inner: xlineapi::DeleteRangeRequest { - key: key.into(), - ..Default::default() - }, - } + #[must_use] + pub fn with_key(mut self, key: impl Into>) -> Self { + self.inner.key = key.into(); + self } /// If set, Xline will delete all keys with the matching prefix #[inline] #[must_use] pub fn with_prefix(mut self) -> Self { - if self.inner.key.is_empty() { - self.inner.key = vec![0]; - self.inner.range_end = vec![0]; - } else { - self.inner.range_end = KeyRange::get_prefix(&self.inner.key); - } + self.range_end_options = RangeOption::Prefix; self } @@ -385,22 +377,15 @@ impl DeleteRangeRequest { #[inline] #[must_use] pub fn with_from_key(mut self) -> Self { - if self.inner.key.is_empty() { - self.inner.key = vec![0]; - } - self.inner.range_end = vec![0]; + self.range_end_options = RangeOption::FromKey; self } - /// `range_end` is the key following the last key to delete for the range \[key,` range_en`d). - /// If `range_end` is not given, the range is defined to contain only the key argument. - /// If `range_end` is one bit larger than the given key, then the range is all the keys - /// with the prefix (the given key). - /// If `range_end` is '\0', the range is all keys greater than or equal to the key argument. + /// If set, Xline will delete all keys in range `[key, range_end)`. #[inline] #[must_use] pub fn with_range_end(mut self, range_end: impl Into>) -> Self { - self.inner.range_end = range_end.into(); + self.range_end_options = RangeOption::RangeEnd(range_end.into()); self } @@ -413,18 +398,11 @@ impl DeleteRangeRequest { self } - /// Get `key` - #[inline] - #[must_use] - pub fn key(&self) -> &[u8] { - &self.inner.key - } - - /// Get `range_end` + /// Get `range_end_options` #[inline] #[must_use] - pub fn range_end(&self) -> &[u8] { - &self.inner.range_end + pub fn range_end_options(&self) -> &RangeOption { + &self.range_end_options } /// Get `prev_kv` @@ -435,9 +413,10 @@ impl DeleteRangeRequest { } } -impl From for xlineapi::DeleteRangeRequest { +impl From for xlineapi::DeleteRangeRequest { #[inline] - fn from(req: DeleteRangeRequest) -> Self { + fn from(mut req: DeleteRangeOptions) -> Self { + req.inner.range_end = req.range_end_options.get_range_end(&mut req.inner.key); req.inner } } @@ -574,9 +553,11 @@ impl TxnOp { /// Creates a `DeleteRange` operation. #[inline] #[must_use] - pub fn delete(request: DeleteRangeRequest) -> Self { + pub fn delete(key: impl Into>, option: Option) -> Self { TxnOp { - inner: xlineapi::Request::RequestDeleteRange(request.into()), + inner: xlineapi::Request::RequestDeleteRange( + option.unwrap_or_default().with_key(key).into(), + ), } } diff --git a/crates/xline-client/tests/it/kv.rs b/crates/xline-client/tests/it/kv.rs index 9b4287b17..588736e3e 100644 --- a/crates/xline-client/tests/it/kv.rs +++ b/crates/xline-client/tests/it/kv.rs @@ -4,7 +4,7 @@ use test_macros::abort_on_panic; use xline_client::{ error::Result, types::kv::{ - CompactionRequest, Compare, CompareResult, DeleteRangeRequest, PutOptions, RangeOptions, + CompactionRequest, Compare, CompareResult, DeleteRangeOptions, PutOptions, RangeOptions, TxnOp, TxnRequest, }, }; @@ -116,7 +116,10 @@ async fn delete_should_remove_previously_put_kvs() -> Result<()> { // delete key { let resp = client - .delete(DeleteRangeRequest::new("del11").with_prev_kv(true)) + .delete( + "del11", + Some(DeleteRangeOptions::default().with_prev_kv(true)), + ) .await?; assert_eq!(resp.deleted, 1); assert_eq!(&resp.prev_kvs[0].key, "del11".as_bytes()); @@ -131,9 +134,12 @@ async fn delete_should_remove_previously_put_kvs() -> Result<()> { { let resp = client .delete( - DeleteRangeRequest::new("del11") - .with_range_end("del22") - .with_prev_kv(true), + "del11", + Some( + DeleteRangeOptions::default() + .with_range_end("del22") + .with_prev_kv(true), + ), ) .await?; assert_eq!(resp.deleted, 2); @@ -158,9 +164,12 @@ async fn delete_should_remove_previously_put_kvs() -> Result<()> { { let resp = client .delete( - DeleteRangeRequest::new("del3") - .with_prefix() - .with_prev_kv(true), + "del3", + Some( + DeleteRangeOptions::default() + .with_prefix() + .with_prev_kv(true), + ), ) .await?; assert_eq!(resp.deleted, 2); diff --git a/crates/xline/tests/it/kv_test.rs b/crates/xline/tests/it/kv_test.rs index 5def9f564..4188fb91d 100644 --- a/crates/xline/tests/it/kv_test.rs +++ b/crates/xline/tests/it/kv_test.rs @@ -3,7 +3,7 @@ use std::{error::Error, time::Duration}; use test_macros::abort_on_panic; use xline_test_utils::{ types::kv::{ - Compare, CompareResult, DeleteRangeRequest, PutOptions, RangeOptions, Response, SortOrder, + Compare, CompareResult, DeleteRangeOptions, PutOptions, RangeOptions, Response, SortOrder, SortTarget, TxnOp, TxnRequest, }, Client, ClientOptions, Cluster, @@ -226,7 +226,8 @@ async fn test_range_redirect() -> Result<(), Box> { #[abort_on_panic] async fn test_kv_delete() -> Result<(), Box> { struct TestCase<'a> { - req: DeleteRangeRequest, + key: Vec, + opt: Option, want_deleted: i64, want_keys: &'a [&'a str], } @@ -239,37 +240,44 @@ async fn test_kv_delete() -> Result<(), Box> { let tests = [ TestCase { - req: DeleteRangeRequest::new("").with_prefix(), + key: "".into(), + opt: Some(DeleteRangeOptions::default().with_prefix()), want_deleted: 5, want_keys: &[], }, TestCase { - req: DeleteRangeRequest::new("").with_from_key(), + key: "".into(), + opt: Some(DeleteRangeOptions::default().with_from_key()), want_deleted: 5, want_keys: &[], }, TestCase { - req: DeleteRangeRequest::new("a").with_range_end("c"), + key: "a".into(), + opt: Some(DeleteRangeOptions::default().with_range_end("c")), want_deleted: 2, want_keys: &["c", "c/abc", "d"], }, TestCase { - req: DeleteRangeRequest::new("c"), + key: "c".into(), + opt: None, want_deleted: 1, want_keys: &["a", "b", "c/abc", "d"], }, TestCase { - req: DeleteRangeRequest::new("c").with_prefix(), + key: "c".into(), + opt: Some(DeleteRangeOptions::default().with_prefix()), want_deleted: 2, want_keys: &["a", "b", "d"], }, TestCase { - req: DeleteRangeRequest::new("c").with_from_key(), + key: "c".into(), + opt: Some(DeleteRangeOptions::default().with_from_key()), want_deleted: 3, want_keys: &["a", "b"], }, TestCase { - req: DeleteRangeRequest::new("e"), + key: "e".into(), + opt: None, want_deleted: 0, want_keys: &keys, }, @@ -280,7 +288,7 @@ async fn test_kv_delete() -> Result<(), Box> { client.put(key, "bar", None).await?; } - let res = client.delete(test.req).await?; + let res = client.delete(test.key, test.opt).await?; assert_eq!(res.deleted, test.want_deleted); let res = client diff --git a/crates/xline/tests/it/watch_test.rs b/crates/xline/tests/it/watch_test.rs index 4d2e8b80c..43d0a67cc 100644 --- a/crates/xline/tests/it/watch_test.rs +++ b/crates/xline/tests/it/watch_test.rs @@ -1,7 +1,7 @@ use std::error::Error; use test_macros::abort_on_panic; -use xline_test_utils::{types::kv::DeleteRangeRequest, Cluster}; +use xline_test_utils::Cluster; use xlineapi::EventType; fn event_type(event_type: i32) -> EventType { @@ -40,7 +40,7 @@ async fn test_watch() -> Result<(), Box> { }); kv_client.put("foo", "bar", None).await?; - kv_client.delete(DeleteRangeRequest::new("foo")).await?; + kv_client.delete("foo", None).await?; handle.await?; diff --git a/crates/xlinectl/src/command/delete.rs b/crates/xlinectl/src/command/delete.rs index 2f7577229..689454023 100644 --- a/crates/xlinectl/src/command/delete.rs +++ b/crates/xlinectl/src/command/delete.rs @@ -1,9 +1,12 @@ use anyhow::Result; use clap::{arg, ArgMatches, Command}; -use xline_client::{types::kv::DeleteRangeRequest, Client}; +use xline_client::{types::kv::DeleteRangeOptions, Client}; use crate::utils::printer::Printer; +/// temp type to pass `(key, delete range options)` +type DeleteRangeRequest = (String, DeleteRangeOptions); + /// Definition of `delete` command pub(crate) fn command() -> Command { Command::new("delete") @@ -32,25 +35,25 @@ pub(crate) fn build_request(matches: &ArgMatches) -> DeleteRangeRequest { let prev_kv = matches.get_flag("prev_kv"); let from_key = matches.get_flag("from_key"); - let mut request = DeleteRangeRequest::new(key.as_bytes()); + let mut options = DeleteRangeOptions::default(); if let Some(range_end) = range_end { - request = request.with_range_end(range_end.as_bytes()); + options = options.with_range_end(range_end.as_bytes()); } if prefix { - request = request.with_prefix(); + options = options.with_prefix(); } - request = request.with_prev_kv(prev_kv); + options = options.with_prev_kv(prev_kv); if from_key { - request = request.with_from_key(); + options = options.with_from_key(); } - request + (key.to_owned(), options) } /// Execute the command pub(crate) async fn execute(client: &mut Client, matches: &ArgMatches) -> Result<()> { let req = build_request(matches); - let resp = client.kv_client().delete(req).await?; + let resp = client.kv_client().delete(req.0, Some(req.1)).await?; resp.print(); Ok(()) @@ -68,23 +71,29 @@ mod tests { let test_cases = vec![ TestCase::new( vec!["delete", "key1"], - Some(DeleteRangeRequest::new("key1".as_bytes())), + Some(("key1".into(), DeleteRangeOptions::default())), ), TestCase::new( vec!["delete", "key2", "end2"], - Some(DeleteRangeRequest::new("key2".as_bytes()).with_range_end("end2".as_bytes())), + Some(( + "key2".into(), + DeleteRangeOptions::default().with_range_end("end2".as_bytes()), + )), ), TestCase::new( vec!["delete", "key3", "--prefix"], - Some(DeleteRangeRequest::new("key3".as_bytes()).with_prefix()), + Some(("key3".into(), DeleteRangeOptions::default().with_prefix())), ), TestCase::new( vec!["delete", "key4", "--prev_kv"], - Some(DeleteRangeRequest::new("key4".as_bytes()).with_prev_kv(true)), + Some(( + "key4".into(), + DeleteRangeOptions::default().with_prev_kv(true), + )), ), TestCase::new( vec!["delete", "key5", "--from_key"], - Some(DeleteRangeRequest::new("key5".as_bytes()).with_from_key()), + Some(("key5".into(), DeleteRangeOptions::default().with_from_key())), ), ]; diff --git a/crates/xlinectl/src/command/txn.rs b/crates/xlinectl/src/command/txn.rs index cb4a311be..664260832 100644 --- a/crates/xlinectl/src/command/txn.rs +++ b/crates/xlinectl/src/command/txn.rs @@ -150,7 +150,7 @@ fn parse_op_line(line: &str) -> Result { "delete" => { let matches = delete_cmd.try_get_matches_from(args.clone())?; let req = delete::build_request(&matches); - Ok(TxnOp::delete(req)) + Ok(TxnOp::delete(req.0, Some(req.1))) } _ => Err(anyhow!(format!("parse op failed in: `{line}`"))), } From a99b1efddd6fc1e31f8e59123a3b648dbb6488a1 Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Mon, 22 Jul 2024 15:58:18 +0800 Subject: [PATCH 023/322] refactor(client)!: refactor KvClient::compact Signed-off-by: lxl66566 --- crates/simulation/src/xline_group.rs | 40 ++++++++--------- crates/simulation/tests/it/xline.rs | 5 +-- crates/xline-client/examples/kv.rs | 7 +-- crates/xline-client/src/clients/kv.rs | 22 ++++++---- crates/xline-client/src/types/kv.rs | 52 ----------------------- crates/xline-client/tests/it/kv.rs | 5 +-- crates/xlinectl/src/command/compaction.rs | 22 ++++------ 7 files changed, 44 insertions(+), 109 deletions(-) diff --git a/crates/simulation/src/xline_group.rs b/crates/simulation/src/xline_group.rs index 262642b72..bd6e7ae8b 100644 --- a/crates/simulation/src/xline_group.rs +++ b/crates/simulation/src/xline_group.rs @@ -13,10 +13,7 @@ use xline_client::{ error::XlineClientError, types::{ cluster::{MemberAddRequest, MemberAddResponse, MemberListRequest, MemberListResponse}, - kv::{ - CompactionRequest, CompactionResponse, PutOptions, PutResponse, RangeOptions, - RangeResponse, - }, + kv::{CompactionResponse, PutOptions, PutResponse, RangeOptions, RangeResponse}, watch::{WatchOptions, WatchStreaming, Watcher}, }, Client, ClientOptions, @@ -159,21 +156,6 @@ pub struct SimClient { handle: NodeHandle, } -macro_rules! impl_client_method { - ($method:ident, $client:ident, $request:ty, $response:ty) => { - pub async fn $method( - &self, - request: $request, - ) -> Result<$response, XlineClientError> { - let client = self.inner.clone(); - self.handle - .spawn(async move { client.$client().$method(request).await }) - .await - .unwrap() - } - }; -} - impl SimClient { pub async fn put( &self, @@ -189,6 +171,7 @@ impl SimClient { .await .unwrap() } + pub async fn range( &self, key: impl Into>, @@ -201,7 +184,19 @@ impl SimClient { .await .unwrap() } - impl_client_method!(compact, kv_client, CompactionRequest, CompactionResponse); + + pub async fn compact( + &self, + revision: i64, + physical: bool, + ) -> Result> { + let client = self.inner.clone(); + self.handle + .spawn(async move { client.kv_client().compact(revision, physical).await }) + .await + .unwrap() + } + pub async fn watch( &self, key: impl Into>, @@ -284,13 +279,14 @@ impl SimEtcdClient { pub async fn compact( &self, - request: CompactionRequest, + revision: i64, + physical: bool, ) -> Result> { let mut client = self.kv.clone(); self.handle .spawn(async move { client - .compact(xlineapi::CompactionRequest::from(request)) + .compact(xlineapi::CompactionRequest { revision, physical }) .await .map(|r| r.into_inner()) .map_err(Into::into) diff --git a/crates/simulation/tests/it/xline.rs b/crates/simulation/tests/it/xline.rs index 4d0423603..8bab83021 100644 --- a/crates/simulation/tests/it/xline.rs +++ b/crates/simulation/tests/it/xline.rs @@ -5,7 +5,6 @@ use madsim::time::sleep; use simulation::xline_group::{SimEtcdClient, XlineGroup}; use xline_client::types::{ cluster::{MemberAddRequest, MemberListRequest}, - kv::CompactionRequest, watch::WatchOptions, }; @@ -33,9 +32,7 @@ async fn watch_compacted_revision_should_receive_canceled_response() { assert!(result.is_ok()); } - let result = client - .compact(CompactionRequest::new(5).with_physical()) - .await; + let result = client.compact(5, true).await; assert!(result.is_ok()); let (_, mut watch_stream) = client diff --git a/crates/xline-client/examples/kv.rs b/crates/xline-client/examples/kv.rs index 6cd6f5d06..0373f74e2 100644 --- a/crates/xline-client/examples/kv.rs +++ b/crates/xline-client/examples/kv.rs @@ -1,9 +1,6 @@ use anyhow::Result; use xline_client::{ - types::kv::{ - CompactionRequest, Compare, CompareResult, DeleteRangeOptions, PutOptions, TxnOp, - TxnRequest, - }, + types::kv::{Compare, CompareResult, DeleteRangeOptions, PutOptions, TxnOp, TxnRequest}, Client, ClientOptions, }; @@ -72,7 +69,7 @@ async fn main() -> Result<()> { // compact let rev = resp.header.unwrap().revision; - let _resp = client.compact(CompactionRequest::new(rev)).await?; + let _resp = client.compact(rev, false).await?; Ok(()) } diff --git a/crates/xline-client/src/clients/kv.rs b/crates/xline-client/src/clients/kv.rs index 726e15bd5..c78e49cba 100644 --- a/crates/xline-client/src/clients/kv.rs +++ b/crates/xline-client/src/clients/kv.rs @@ -8,7 +8,7 @@ use xlineapi::{ use crate::{ error::Result, - types::kv::{CompactionRequest, DeleteRangeOptions, PutOptions, RangeOptions, TxnRequest}, + types::kv::{DeleteRangeOptions, PutOptions, RangeOptions, TxnRequest}, AuthService, CurpClient, }; @@ -252,6 +252,11 @@ impl KvClient { /// We compact at revision 3. After the compaction, the revision list will become [(A, 3), (A, 4), (A, 5)]. /// All revisions less than 3 are deleted. The latest revision, 3, will be kept. /// + /// `Revision` is the key-value store revision for the compaction operation. + /// `Physical` is set so the RPC will wait until the compaction is physically + /// applied to the local database such that compacted entries are totally + /// removed from the backend database. + /// /// # Errors /// /// This function will return an error if the inner CURP client encountered a propose failure @@ -260,8 +265,7 @@ impl KvClient { /// ///```no_run /// use xline_client::{ - /// types::kv::{CompactionRequest}, - /// Client, ClientOptions, + /// Client, ClientOptions /// }; /// use anyhow::Result; /// @@ -276,23 +280,23 @@ impl KvClient { /// let resp_put = client.put("key", "val", None).await?; /// let rev = resp_put.header.unwrap().revision; /// - /// let _resp = client.compact(CompactionRequest::new(rev)).await?; + /// let _resp = client.compact(rev, false).await?; /// /// Ok(()) /// } /// ``` #[inline] - pub async fn compact(&self, request: CompactionRequest) -> Result { - if request.physical() { + pub async fn compact(&self, revision: i64, physical: bool) -> Result { + let request = xlineapi::CompactionRequest { revision, physical }; + if physical { let mut kv_client = self.kv_client.clone(); return kv_client - .compact(xlineapi::CompactionRequest::from(request)) + .compact(request) .await .map(tonic::Response::into_inner) .map_err(Into::into); } - let request = RequestWrapper::from(xlineapi::CompactionRequest::from(request)); - let cmd = Command::new(request); + let cmd = Command::new(RequestWrapper::from(request)); let (cmd_res, _sync_res) = self .curp_client .propose(&cmd, self.token.as_ref(), true) diff --git a/crates/xline-client/src/types/kv.rs b/crates/xline-client/src/types/kv.rs index ec50bd973..fe23e0ebb 100644 --- a/crates/xline-client/src/types/kv.rs +++ b/crates/xline-client/src/types/kv.rs @@ -680,55 +680,3 @@ impl From for xlineapi::TxnRequest { txn.inner } } - -/// Compaction Request compacts the key-value store up to a given revision. -/// All keys with revisions less than the given revision will be compacted. -/// The compaction process will remove all historical versions of these keys, except for the most recent one. -/// For example, here is a revision list: [(A, 1), (A, 2), (A, 3), (A, 4), (A, 5)]. -/// We compact at revision 3. After the compaction, the revision list will become [(A, 3), (A, 4), (A, 5)]. -/// All revisions less than 3 are deleted. The latest revision, 3, will be kept. -#[derive(Debug, PartialEq)] -pub struct CompactionRequest { - /// The inner request - inner: xlineapi::CompactionRequest, -} - -impl CompactionRequest { - /// Creates a new `CompactionRequest` - /// - /// `Revision` is the key-value store revision for the compaction operation. - #[inline] - #[must_use] - pub fn new(revision: i64) -> Self { - Self { - inner: xlineapi::CompactionRequest { - revision, - ..Default::default() - }, - } - } - - /// Physical is set so the RPC will wait until the compaction is physically - /// applied to the local database such that compacted entries are totally - /// removed from the backend database. - #[inline] - #[must_use] - pub fn with_physical(mut self) -> Self { - self.inner.physical = true; - self - } - - /// Get `physical` - #[inline] - #[must_use] - pub fn physical(&self) -> bool { - self.inner.physical - } -} - -impl From for xlineapi::CompactionRequest { - #[inline] - fn from(req: CompactionRequest) -> Self { - req.inner - } -} diff --git a/crates/xline-client/tests/it/kv.rs b/crates/xline-client/tests/it/kv.rs index 588736e3e..e254adfd4 100644 --- a/crates/xline-client/tests/it/kv.rs +++ b/crates/xline-client/tests/it/kv.rs @@ -4,8 +4,7 @@ use test_macros::abort_on_panic; use xline_client::{ error::Result, types::kv::{ - CompactionRequest, Compare, CompareResult, DeleteRangeOptions, PutOptions, RangeOptions, - TxnOp, TxnRequest, + Compare, CompareResult, DeleteRangeOptions, PutOptions, RangeOptions, TxnOp, TxnRequest, }, }; @@ -272,7 +271,7 @@ async fn compact_should_remove_previous_revision() -> Result<()> { .await?; assert_eq!(rev1_resp.kvs[0].value, b"1"); - client.compact(CompactionRequest::new(3)).await?; + client.compact(3, false).await?; // after compacting let rev0_resp = client diff --git a/crates/xlinectl/src/command/compaction.rs b/crates/xlinectl/src/command/compaction.rs index 64a201973..274b92d3e 100644 --- a/crates/xlinectl/src/command/compaction.rs +++ b/crates/xlinectl/src/command/compaction.rs @@ -1,9 +1,12 @@ use anyhow::Result; use clap::{arg, value_parser, ArgMatches, Command}; -use xline_client::{types::kv::CompactionRequest, Client}; +use xline_client::Client; use crate::utils::printer::Printer; +/// Temp type for build a compaction request, indicates `(revision, physical)` +type CompactionRequest = (i64, bool); + /// Definition of `compaction` command pub(crate) fn command() -> Command { Command::new("compaction") @@ -17,19 +20,13 @@ pub(crate) fn build_request(matches: &ArgMatches) -> CompactionRequest { let revision = matches.get_one::("revision").expect("required"); let physical = matches.get_flag("physical"); - let mut request = CompactionRequest::new(*revision); - - if physical { - request = request.with_physical(); - } - - request + (*revision, physical) } /// Execute the command pub(crate) async fn execute(client: &mut Client, matches: &ArgMatches) -> Result<()> { let req = build_request(matches); - let resp = client.kv_client().compact(req).await?; + let resp = client.kv_client().compact(req.0, req.1).await?; resp.print(); Ok(()) @@ -45,11 +42,8 @@ mod tests { #[test] fn command_parse_should_be_valid() { let test_cases = vec![ - TestCase::new(vec!["compaction", "123"], Some(CompactionRequest::new(123))), - TestCase::new( - vec!["compaction", "123", "--physical"], - Some(CompactionRequest::new(123).with_physical()), - ), + TestCase::new(vec!["compaction", "123"], Some((123, false))), + TestCase::new(vec!["compaction", "123", "--physical"], Some((123, true))), ]; for case in test_cases { From 0e6f32c94ea82cbf2aae803e31f0d17f4c7a4f13 Mon Sep 17 00:00:00 2001 From: Yao Junwen <1440781054@qq.com> Date: Wed, 31 Jul 2024 18:36:49 +0800 Subject: [PATCH 024/322] chore: upgrade axum to 0.7 Upgrade axum to 0.7, tonic to 0.12, hyper to 1.0, opentelemetry to 0.24, opentelemetry_sdk to 0.24.1, opentelemetry_otlp to 0.17. Signed-off-by: Yao Junwen <1440781054@qq.com> --- Cargo.lock | 457 +++++++------------ Cargo.toml | 9 +- crates/benchmark/Cargo.toml | 2 +- crates/curp-external-api/Cargo.toml | 2 +- crates/curp-test-utils/Cargo.toml | 4 +- crates/curp/Cargo.toml | 12 +- crates/curp/src/rpc/mod.rs | 5 - crates/curp/src/server/curp_node.rs | 5 +- crates/curp/src/server/mod.rs | 2 +- crates/engine/Cargo.toml | 2 +- crates/simulation/Cargo.toml | 6 +- crates/utils/Cargo.toml | 16 +- crates/utils/src/tracing.rs | 11 +- crates/xline-client/Cargo.toml | 8 +- crates/xline-client/src/types/cluster.rs | 6 +- crates/xline-test-utils/Cargo.toml | 3 +- crates/xline/Cargo.toml | 29 +- crates/xline/src/server/maintenance.rs | 2 +- crates/xline/src/server/xline_server.rs | 2 +- crates/xline/src/storage/alarm_store.rs | 10 +- crates/xline/src/storage/auth_store/store.rs | 26 +- crates/xline/src/storage/lease_store/mod.rs | 8 +- crates/xline/src/utils/metrics.rs | 9 +- crates/xline/src/utils/trace.rs | 9 +- crates/xlineapi/Cargo.toml | 6 +- crates/xlinectl/Cargo.toml | 2 +- workspace-hack/Cargo.toml | 20 +- 27 files changed, 279 insertions(+), 394 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6ee8ac378..ae693d84a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -226,6 +226,12 @@ dependencies = [ "syn 2.0.63", ] +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "autocfg" version = "1.3.0" @@ -234,18 +240,19 @@ checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "axum" -version = "0.6.20" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b829e4e32b91e643de6eafe82b1d90675f5874230191a4ffbc1b336dec4d6bf" +checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf" dependencies = [ "async-trait", "axum-core", - "bitflags 1.3.2", "bytes", "futures-util", "http", "http-body", + "http-body-util", "hyper", + "hyper-util", "itoa", "matchit", "memchr", @@ -257,28 +264,33 @@ dependencies = [ "serde_json", "serde_path_to_error", "serde_urlencoded", - "sync_wrapper", + "sync_wrapper 1.0.1", "tokio", "tower", "tower-layer", "tower-service", + "tracing", ] [[package]] name = "axum-core" -version = "0.3.4" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c" +checksum = "a15c63fd72d41492dc4f497196f5da1fb04fb7529e631d73630d1b491e47a2e3" dependencies = [ "async-trait", "bytes", "futures-util", "http", "http-body", + "http-body-util", "mime", + "pin-project-lite", "rustversion", + "sync_wrapper 0.1.2", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -360,7 +372,7 @@ version = "0.69.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" dependencies = [ - "bitflags 2.5.0", + "bitflags", "cexpr", "clang-sys", "itertools 0.12.1", @@ -374,12 +386,6 @@ dependencies = [ "syn 2.0.63", ] -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - [[package]] name = "bitflags" version = "2.5.0" @@ -412,12 +418,6 @@ version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - [[package]] name = "bytes" version = "1.6.0" @@ -573,16 +573,6 @@ version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" -[[package]] -name = "core-foundation" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "core-foundation-sys" version = "0.8.6" @@ -678,7 +668,7 @@ dependencies = [ "madsim-tonic-build", "mockall", "once_cell", - "opentelemetry 0.21.0", + "opentelemetry", "parking_lot", "priority-queue", "prost", @@ -912,15 +902,6 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" -[[package]] -name = "encoding_rs" -version = "0.8.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" -dependencies = [ - "cfg-if", -] - [[package]] name = "engine" version = "0.1.0" @@ -930,7 +911,7 @@ dependencies = [ "bytes", "clippy-utilities", "madsim-tokio", - "opentelemetry 0.21.0", + "opentelemetry", "parking_lot", "rocksdb", "serde", @@ -961,9 +942,9 @@ dependencies = [ [[package]] name = "etcd-client" -version = "0.13.0" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b915bb9b1e143ab7062e0067ed663e3dfeffc69ce0ceb9e93b35fecfc158d28" +checksum = "39bde3ce50a626efeb1caa9ab1083972d178bebb55ca627639c8ded507dfcbde" dependencies = [ "http", "prost", @@ -1189,15 +1170,15 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.26" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" +checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab" dependencies = [ + "atomic-waker", "bytes", "fnv", "futures-core", "futures-sink", - "futures-util", "http", "indexmap 2.2.6", "slab", @@ -1247,9 +1228,9 @@ dependencies = [ [[package]] name = "http" -version = "0.2.12" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" dependencies = [ "bytes", "fnv", @@ -1258,12 +1239,24 @@ dependencies = [ [[package]] name = "http-body" -version = "0.4.6" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +dependencies = [ + "bytes", + "futures-util", + "http", + "http-body", "pin-project-lite", ] @@ -1281,13 +1274,12 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hyper" -version = "0.14.28" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" +checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05" dependencies = [ "bytes", "futures-channel", - "futures-core", "futures-util", "h2", "http", @@ -1296,23 +1288,42 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2", + "smallvec", "tokio", - "tower-service", - "tracing", "want", ] [[package]] name = "hyper-timeout" -version = "0.4.1" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3203a961e5c83b6f5498933e78b6b263e208c197b63e9c6c53cc82ffd3f63793" +dependencies = [ + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" +checksum = "3ab92f4f49ee4fb4f997c784b7a2e0fa70050211e0b6a287f898c3c9785ca956" dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", "hyper", "pin-project-lite", + "socket2", "tokio", - "tokio-io-timeout", + "tower", + "tower-service", + "tracing", ] [[package]] @@ -1396,12 +1407,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "integer-encoding" -version = "3.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" - [[package]] name = "ipnet" version = "2.9.0" @@ -1560,8 +1565,8 @@ dependencies = [ [[package]] name = "madsim" -version = "0.2.27" -source = "git+https://github.com/Phoenix500526/madsim.git?branch=update-tonic#4df254ae43fe7921a8403873460005379ccb8247" +version = "0.2.30" +source = "git+https://github.com/LucienY01/madsim.git?branch=bz/tonic-0-12#a7d205e8f044876105cb8980c1c5b5231dd9a170" dependencies = [ "ahash", "async-channel", @@ -1591,7 +1596,7 @@ dependencies = [ [[package]] name = "madsim-macros" version = "0.2.12" -source = "git+https://github.com/Phoenix500526/madsim.git?branch=update-tonic#4df254ae43fe7921a8403873460005379ccb8247" +source = "git+https://github.com/LucienY01/madsim.git?branch=bz/tonic-0-12#a7d205e8f044876105cb8980c1c5b5231dd9a170" dependencies = [ "darling 0.14.4", "proc-macro2", @@ -1601,8 +1606,8 @@ dependencies = [ [[package]] name = "madsim-tokio" -version = "0.2.25" -source = "git+https://github.com/Phoenix500526/madsim.git?branch=update-tonic#4df254ae43fe7921a8403873460005379ccb8247" +version = "0.2.28" +source = "git+https://github.com/LucienY01/madsim.git?branch=bz/tonic-0-12#a7d205e8f044876105cb8980c1c5b5231dd9a170" dependencies = [ "madsim", "spin", @@ -1611,8 +1616,8 @@ dependencies = [ [[package]] name = "madsim-tonic" -version = "0.4.2+0.11.0" -source = "git+https://github.com/Phoenix500526/madsim.git?branch=update-tonic#4df254ae43fe7921a8403873460005379ccb8247" +version = "0.5.0+0.12.0" +source = "git+https://github.com/LucienY01/madsim.git?branch=bz/tonic-0-12#a7d205e8f044876105cb8980c1c5b5231dd9a170" dependencies = [ "async-stream", "chrono", @@ -1626,8 +1631,8 @@ dependencies = [ [[package]] name = "madsim-tonic-build" -version = "0.4.3+0.11.0" -source = "git+https://github.com/Phoenix500526/madsim.git?branch=update-tonic#4df254ae43fe7921a8403873460005379ccb8247" +version = "0.5.0+0.12.0" +source = "git+https://github.com/LucienY01/madsim.git?branch=bz/tonic-0-12#a7d205e8f044876105cb8980c1c5b5231dd9a170" dependencies = [ "prettyplease", "proc-macro2", @@ -1753,7 +1758,7 @@ version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" dependencies = [ - "bitflags 2.5.0", + "bitflags", "cfg-if", "cfg_aliases", "libc", @@ -1852,40 +1857,9 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "opentelemetry" -version = "0.21.0" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e32339a5dc40459130b3bd269e9892439f55b33e772d2a9d402a789baaf4e8a" -dependencies = [ - "futures-core", - "futures-sink", - "indexmap 2.2.6", - "js-sys", - "once_cell", - "pin-project-lite", - "thiserror", - "urlencoding", -] - -[[package]] -name = "opentelemetry" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "900d57987be3f2aeb70d385fff9b27fb74c5723cc9a52d904d4f9c807a0667bf" -dependencies = [ - "futures-core", - "futures-sink", - "js-sys", - "once_cell", - "pin-project-lite", - "thiserror", - "urlencoding", -] - -[[package]] -name = "opentelemetry" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b69a91d4893e713e06f724597ad630f1fa76057a5e1026c0ca67054a9032a76" +checksum = "4c365a63eec4f55b7efeceb724f1336f26a9cf3427b70e59e2cd2a5b947fba96" dependencies = [ "futures-core", "futures-sink", @@ -1897,63 +1871,56 @@ dependencies = [ [[package]] name = "opentelemetry-contrib" -version = "0.14.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d4c267ff82b3e9e9f548199267c3f722d9cffe3bfe4318b05fcf56fd5357aad" +checksum = "60741e61c3c2ae6000c7cbb0d8184d4c60571c65bf0af32b418152570c8cb110" dependencies = [ "async-trait", "futures-core", "futures-util", "once_cell", - "opentelemetry 0.22.0", - "opentelemetry-semantic-conventions 0.14.0", - "opentelemetry_sdk 0.22.1", + "opentelemetry", + "opentelemetry-semantic-conventions", + "opentelemetry_sdk", "serde_json", "tokio", ] [[package]] name = "opentelemetry-http" -version = "0.11.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7690dc77bf776713848c4faa6501157469017eaf332baccd4eb1cea928743d94" +checksum = "ad31e9de44ee3538fb9d64fe3376c1362f406162434609e79aea2a41a0af78ab" dependencies = [ "async-trait", "bytes", "http", - "opentelemetry 0.22.0", + "opentelemetry", "reqwest", ] [[package]] -name = "opentelemetry-jaeger" -version = "0.22.0" +name = "opentelemetry-jaeger-propagator" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "501b471b67b746d9a07d4c29f8be00f952d1a2eca356922ede0098cbaddff19f" +checksum = "fc0a68a13b92fc708d875ad659b08b35d08b8ef2403e01944b39ca21e5b08b17" dependencies = [ - "async-trait", - "futures-core", - "futures-util", - "opentelemetry 0.23.0", - "opentelemetry-semantic-conventions 0.15.0", - "opentelemetry_sdk 0.23.0", - "thrift", + "opentelemetry", ] [[package]] name = "opentelemetry-otlp" -version = "0.15.0" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a016b8d9495c639af2145ac22387dcb88e44118e45320d9238fbf4e7889abcb" +checksum = "6b925a602ffb916fb7421276b86756027b37ee708f9dce2dbdcc51739f07e727" dependencies = [ "async-trait", "futures-core", "http", - "opentelemetry 0.22.0", + "opentelemetry", "opentelemetry-http", "opentelemetry-proto", - "opentelemetry-semantic-conventions 0.14.0", - "opentelemetry_sdk 0.22.1", + "opentelemetry_sdk", "prost", "reqwest", "thiserror", @@ -1963,100 +1930,56 @@ dependencies = [ [[package]] name = "opentelemetry-prometheus" -version = "0.15.0" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30bbcf6341cab7e2193e5843f0ac36c446a5b3fccb28747afaeda17996dcd02e" +checksum = "cc4191ce34aa274621861a7a9d68dbcf618d5b6c66b10081631b61fd81fbc015" dependencies = [ "once_cell", - "opentelemetry 0.22.0", - "opentelemetry_sdk 0.22.1", + "opentelemetry", + "opentelemetry_sdk", "prometheus", "protobuf", ] [[package]] name = "opentelemetry-proto" -version = "0.5.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a8fddc9b68f5b80dae9d6f510b88e02396f006ad48cac349411fbecc80caae4" +checksum = "30ee9f20bff9c984511a02f082dc8ede839e4a9bf15cc2487c8d6fea5ad850d9" dependencies = [ - "opentelemetry 0.22.0", - "opentelemetry_sdk 0.22.1", + "opentelemetry", + "opentelemetry_sdk", "prost", "tonic", ] [[package]] name = "opentelemetry-semantic-conventions" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9ab5bd6c42fb9349dcf28af2ba9a0667f697f9bdcca045d39f2cec5543e2910" - -[[package]] -name = "opentelemetry-semantic-conventions" -version = "0.15.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1869fb4bb9b35c5ba8a1e40c9b128a7b4c010d07091e864a29da19e4fe2ca4d7" +checksum = "1cefe0543875379e47eb5f1e68ff83f45cc41366a92dfd0d073d513bf68e9a05" [[package]] name = "opentelemetry_sdk" -version = "0.22.1" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e90c7113be649e31e9a0f8b5ee24ed7a16923b322c3c5ab6367469c049d6b7e" +checksum = "692eac490ec80f24a17828d49b40b60f5aeaccdfe6a503f939713afd22bc28df" dependencies = [ "async-trait", - "crossbeam-channel", "futures-channel", "futures-executor", "futures-util", "glob", "once_cell", - "opentelemetry 0.22.0", - "ordered-float 4.2.0", + "opentelemetry", "percent-encoding", "rand", + "serde_json", "thiserror", "tokio", "tokio-stream 0.1.15", ] -[[package]] -name = "opentelemetry_sdk" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae312d58eaa90a82d2e627fd86e075cf5230b3f11794e2ed74199ebbe572d4fd" -dependencies = [ - "async-trait", - "futures-channel", - "futures-executor", - "futures-util", - "lazy_static", - "once_cell", - "opentelemetry 0.23.0", - "ordered-float 4.2.0", - "percent-encoding", - "rand", - "thiserror", -] - -[[package]] -name = "ordered-float" -version = "2.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" -dependencies = [ - "num-traits", -] - -[[package]] -name = "ordered-float" -version = "4.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a76df7075c7d4d01fdcb46c912dd17fba5b60c78ea480b475f2b6ab6f666584e" -dependencies = [ - "num-traits", -] - [[package]] name = "overload" version = "0.1.1" @@ -2301,9 +2224,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.12.6" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" +checksum = "e13db3d3fde688c61e2446b4d843bc27a7e8af269a69440c0308021dc92333cc" dependencies = [ "bytes", "prost-derive", @@ -2311,13 +2234,13 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.12.6" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" +checksum = "5bb182580f71dd070f88d01ce3de9f4da5021db7115d2e1c3605a754153b77c1" dependencies = [ "bytes", "heck", - "itertools 0.12.1", + "itertools 0.13.0", "log", "multimap", "once_cell", @@ -2332,12 +2255,12 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.12.6" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" +checksum = "18bec9b0adc4eba778b33684b7ba3e7137789434769ee3ce3930463ef904cfca" dependencies = [ "anyhow", - "itertools 0.12.1", + "itertools 0.13.0", "proc-macro2", "quote", "syn 2.0.63", @@ -2345,9 +2268,9 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.12.6" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0" +checksum = "cee5168b05f49d4b0ca581206eb14a7b22fafd963efe729ac48eb03266e25cc2" dependencies = [ "prost", ] @@ -2412,7 +2335,7 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" dependencies = [ - "bitflags 2.5.0", + "bitflags", ] [[package]] @@ -2461,19 +2384,20 @@ checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] name = "reqwest" -version = "0.11.27" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" +checksum = "c7d6d2a27d57148378eb5e111173f4276ad26340ecc5c49a4a2152167a2d6a37" dependencies = [ - "base64 0.21.7", + "base64 0.22.1", "bytes", - "encoding_rs", + "futures-channel", "futures-core", "futures-util", - "h2", "http", "http-body", + "http-body-util", "hyper", + "hyper-util", "ipnet", "js-sys", "log", @@ -2484,8 +2408,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "sync_wrapper", - "system-configuration", + "sync_wrapper 1.0.1", "tokio", "tower-service", "url", @@ -2547,7 +2470,7 @@ version = "0.38.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" dependencies = [ - "bitflags 2.5.0", + "bitflags", "errno", "libc", "linux-raw-sys", @@ -2556,11 +2479,12 @@ dependencies = [ [[package]] name = "rustls" -version = "0.22.4" +version = "0.23.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" +checksum = "ebbbdb961df0ad3f2652da8f3fdc4b36122f568f968f45ad3316f26c025c677b" dependencies = [ "log", + "once_cell", "ring", "rustls-pki-types", "rustls-webpki", @@ -2873,25 +2797,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" [[package]] -name = "system-configuration" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" -dependencies = [ - "bitflags 1.3.2", - "core-foundation", - "system-configuration-sys", -] - -[[package]] -name = "system-configuration-sys" -version = "0.5.0" +name = "sync_wrapper" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" -dependencies = [ - "core-foundation-sys", - "libc", -] +checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" [[package]] name = "tempfile" @@ -2952,28 +2861,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "threadpool" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" -dependencies = [ - "num_cpus", -] - -[[package]] -name = "thrift" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" -dependencies = [ - "byteorder", - "integer-encoding", - "log", - "ordered-float 2.10.1", - "threadpool", -] - [[package]] name = "time" version = "0.3.36" @@ -3038,16 +2925,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "tokio-io-timeout" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30b74022ada614a1b4834de765f9bb43877f910cc8ce4be40e89042c9223a8bf" -dependencies = [ - "pin-project-lite", - "tokio", -] - [[package]] name = "tokio-macros" version = "2.3.0" @@ -3061,9 +2938,9 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.25.0" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "775e0c0f0adb3a2f22a00c4745d728b479985fc15ee7ca6a2608388c5569860f" +checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" dependencies = [ "rustls", "rustls-pki-types", @@ -3140,25 +3017,27 @@ dependencies = [ [[package]] name = "tonic" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76c4eb7a4e9ef9d4763600161f12f5070b92a578e1b634db88a6887844c91a13" +checksum = "38659f4a91aba8598d27821589f5db7dddd94601e7a01b1e485a50e5484c7401" dependencies = [ "async-stream", "async-trait", "axum", - "base64 0.21.7", + "base64 0.22.1", "bytes", "h2", "http", "http-body", + "http-body-util", "hyper", "hyper-timeout", + "hyper-util", "percent-encoding", "pin-project", "prost", "rustls-pemfile", - "rustls-pki-types", + "socket2", "tokio", "tokio-rustls", "tokio-stream 0.1.15", @@ -3170,9 +3049,9 @@ dependencies = [ [[package]] name = "tonic-build" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4ef6dd70a610078cb4e338a0f79d06bc759ff1b22d2120c2ff02ae264ba9c2" +checksum = "568392c5a2bd0020723e3f387891176aabafe36fd9fcd074ad309dfa0c8eb964" dependencies = [ "prettyplease", "proc-macro2", @@ -3183,9 +3062,9 @@ dependencies = [ [[package]] name = "tonic-health" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cef6e24bc96871001a7e48e820ab240b3de2201e59b517cf52835df2f1d2350" +checksum = "e1e10e6a96ee08b6ce443487d4368442d328d0e746f3681f81127f7dc41b4955" dependencies = [ "async-stream", "prost", @@ -3284,14 +3163,14 @@ dependencies = [ [[package]] name = "tracing-opentelemetry" -version = "0.23.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9be14ba1bbe4ab79e9229f7f89fab8d120b865859f10527f31c033e599d2284" +checksum = "a9784ed4da7d921bc8df6963f8c80a0e4ce34ba6ba76668acadd3edbd985ff3b" dependencies = [ "js-sys", "once_cell", - "opentelemetry 0.22.0", - "opentelemetry_sdk 0.22.1", + "opentelemetry", + "opentelemetry_sdk", "smallvec", "tracing", "tracing-core", @@ -3398,12 +3277,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "urlencoding" -version = "2.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" - [[package]] name = "utf8parse" version = "0.2.1" @@ -3423,10 +3296,10 @@ dependencies = [ "getset", "madsim-tokio", "madsim-tonic", - "opentelemetry 0.22.0", - "opentelemetry-jaeger", + "opentelemetry", + "opentelemetry-jaeger-propagator", "opentelemetry-otlp", - "opentelemetry_sdk 0.22.1", + "opentelemetry_sdk", "parking_lot", "pbkdf2", "petgraph", @@ -3761,9 +3634,9 @@ dependencies = [ [[package]] name = "winreg" -version = "0.50.0" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" dependencies = [ "cfg-if", "windows-sys 0.48.0", @@ -3774,33 +3647,34 @@ name = "workspace-hack" version = "0.1.0" dependencies = [ "axum", + "axum-core", "bytes", "cc", "clap", - "crossbeam-utils", "crypto-common", "digest", "either", "futures-channel", "futures-util", "getrandom", - "itertools 0.12.1", + "itertools 0.13.0", "libc", "log", "madsim-tokio", "madsim-tonic", "memchr", - "num-traits", - "opentelemetry_sdk 0.22.1", + "opentelemetry_sdk", "petgraph", "predicates", "serde", "serde_json", "sha2", + "smallvec", "syn 1.0.109", "syn 2.0.63", "time", "tokio", + "tokio-stream 0.1.15", "tokio-util", "tonic", "tower", @@ -3859,11 +3733,11 @@ dependencies = [ "merged_range", "mockall", "nix", - "opentelemetry 0.22.0", + "opentelemetry", "opentelemetry-contrib", "opentelemetry-otlp", "opentelemetry-prometheus", - "opentelemetry_sdk 0.22.1", + "opentelemetry_sdk", "parking_lot", "pbkdf2", "priority-queue", @@ -3875,6 +3749,7 @@ dependencies = [ "strum", "strum_macros", "test-macros", + "tokio", "tokio-stream 0.1.12", "tokio-util", "toml", @@ -3906,6 +3781,8 @@ dependencies = [ "http", "madsim-tokio", "madsim-tonic", + "madsim-tonic-build", + "prost", "rand", "test-macros", "thiserror", diff --git a/Cargo.toml b/Cargo.toml index 27e783b45..8afd49d17 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,8 +23,7 @@ ignored = ["prost", "workspace-hack"] [patch.crates-io] # This branch update the tonic version for madsim. We should switch to the original etcd-client crate when new version release. -madsim = { git = "https://github.com/Phoenix500526/madsim.git", branch = "update-tonic" } -madsim-tonic = { git = "https://github.com/Phoenix500526/madsim.git", branch = "update-tonic" } -madsim-tonic-build = { git = "https://github.com/Phoenix500526/madsim.git", branch = "update-tonic" } -madsim-tokio = { git = "https://github.com/Phoenix500526/madsim.git", branch = "update-tonic" } - +madsim = { git = "https://github.com/LucienY01/madsim.git", branch = "bz/tonic-0-12" } +madsim-tonic = { git = "https://github.com/LucienY01/madsim.git", branch = "bz/tonic-0-12" } +madsim-tonic-build = { git = "https://github.com/LucienY01/madsim.git", branch = "bz/tonic-0-12" } +madsim-tokio = { git = "https://github.com/LucienY01/madsim.git", branch = "bz/tonic-0-12" } diff --git a/crates/benchmark/Cargo.toml b/crates/benchmark/Cargo.toml index cc6a1c215..652e49932 100644 --- a/crates/benchmark/Cargo.toml +++ b/crates/benchmark/Cargo.toml @@ -16,7 +16,7 @@ repository = "https://github.com/xline-kv/Xline/tree/master/benchmark" anyhow = "1.0.83" clap = { version = "4", features = ["derive"] } clippy-utilities = "0.2.0" -etcd-client = { version = "0.13.0", features = ["tls"] } +etcd-client = { version = "0.14.0", features = ["tls"] } indicatif = "0.17.8" rand = "0.8.5" thiserror = "1.0.61" diff --git a/crates/curp-external-api/Cargo.toml b/crates/curp-external-api/Cargo.toml index 99d0b212d..c17124b52 100644 --- a/crates/curp-external-api/Cargo.toml +++ b/crates/curp-external-api/Cargo.toml @@ -14,7 +14,7 @@ keywords = ["API", "Curp"] async-trait = "0.1.80" engine = { path = "../engine" } mockall = "0.12.1" -prost = "0.12.3" +prost = "0.13" serde = { version = "1.0.204", features = ["derive", "rc"] } thiserror = "1.0.61" workspace-hack = { version = "0.1", path = "../../workspace-hack" } diff --git a/crates/curp-test-utils/Cargo.toml b/crates/curp-test-utils/Cargo.toml index 059239951..cfae0b69f 100644 --- a/crates/curp-test-utils/Cargo.toml +++ b/crates/curp-test-utils/Cargo.toml @@ -16,13 +16,13 @@ bincode = "1.3.3" curp-external-api = { path = "../curp-external-api" } engine = { path = "../engine" } itertools = "0.13" -prost = "0.12.3" +prost = "0.13" serde = { version = "1.0.204", features = ["derive", "rc"] } thiserror = "1.0.61" tokio = { version = "0.2.25", package = "madsim-tokio", features = [ "rt-multi-thread", ] } -tracing = { version = "0.1.34", features = ["std", "log", "attributes"] } +tracing = { version = "0.1.37", features = ["std", "log", "attributes"] } tracing-subscriber = { version = "0.3.16", features = ["env-filter", "time"] } utils = { path = "../utils", version = "0.1.0", features = ["parking_lot"] } workspace-hack = { version = "0.1", path = "../../workspace-hack" } diff --git a/crates/curp/Cargo.toml b/crates/curp/Cargo.toml index 25dc3dc29..7bde0dcc8 100644 --- a/crates/curp/Cargo.toml +++ b/crates/curp/Cargo.toml @@ -29,10 +29,10 @@ futures = "0.3.21" indexmap = "2.2.6" itertools = "0.13" madsim = { version = "0.2.27", features = ["rpc", "macros"] } -opentelemetry = { version = "0.21.0", features = ["metrics"] } +opentelemetry = { version = "0.24.0", features = ["metrics"] } parking_lot = "0.12.3" priority-queue = "2.0.2" -prost = "0.12.3" +prost = "0.13" rand = "0.8.5" serde = { version = "1.0.204", features = ["derive", "rc"] } sha2 = "0.10.8" @@ -44,9 +44,9 @@ tokio-stream = { git = "https://github.com/madsim-rs/tokio.git", rev = "ab251ad" "net", ] } tokio-util = "0.7.11" -tonic = { version = "0.4.2", package = "madsim-tonic", features = ["tls"] } +tonic = { version = "0.5.0", package = "madsim-tonic", features = ["tls"] } tower = { version = "0.4.13", features = ["filter"] } -tracing = { version = "0.1.34", features = ["std", "log", "attributes"] } +tracing = { version = "0.1.37", features = ["std", "log", "attributes"] } utils = { path = "../utils", version = "0.1.0", features = ["parking_lot"] } workspace-hack = { version = "0.1", path = "../../workspace-hack" } @@ -62,8 +62,8 @@ tracing-subscriber = { version = "0.3.16", features = ["env-filter", "time"] } tracing-test = "0.2.4" [build-dependencies] -prost-build = "0.12.6" -tonic-build = { version = "0.4.3", package = "madsim-tonic-build" } +prost-build = "0.13.0" +tonic-build = { version = "0.5.0", package = "madsim-tonic-build" } [features] client-metrics = [] diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index 657789407..560401961 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -173,7 +173,6 @@ impl ProposeRequest { #[must_use] pub fn propose_id(&self) -> ProposeId { self.propose_id - .clone() .unwrap_or_else(|| unreachable!("propose id must be set in ProposeRequest")) .into() } @@ -236,7 +235,6 @@ impl WaitSyncedRequest { /// Get the `propose_id` reference pub(crate) fn propose_id(&self) -> ProposeId { self.propose_id - .clone() .unwrap_or_else(|| { unreachable!("propose id should be set in propose wait synced request") }) @@ -577,7 +575,6 @@ impl ProposeConfChangeRequest { /// Get id of the request pub(crate) fn propose_id(&self) -> ProposeId { self.propose_id - .clone() .unwrap_or_else(|| { unreachable!("propose id should be set in propose conf change request") }) @@ -597,7 +594,6 @@ impl ShutdownRequest { /// Get id of the request pub(crate) fn propose_id(&self) -> ProposeId { self.propose_id - .clone() .unwrap_or_else(|| { unreachable!("propose id should be set in propose conf change request") }) @@ -634,7 +630,6 @@ impl PublishRequest { /// Get id of the request pub(crate) fn propose_id(&self) -> ProposeId { self.propose_id - .clone() .unwrap_or_else(|| { unreachable!("propose id should be set in propose conf change request") }) diff --git a/crates/curp/src/server/curp_node.rs b/crates/curp/src/server/curp_node.rs index f0a4e5857..760e6e23e 100644 --- a/crates/curp/src/server/curp_node.rs +++ b/crates/curp/src/server/curp_node.rs @@ -208,10 +208,7 @@ impl CurpNode { } /// Handle `TriggerShutdown` requests - pub(super) fn trigger_shutdown( - &self, - _req: &TriggerShutdownRequest, - ) -> TriggerShutdownResponse { + pub(super) fn trigger_shutdown(&self, _req: TriggerShutdownRequest) -> TriggerShutdownResponse { self.curp.task_manager().mark_leader_notified(); TriggerShutdownResponse::default() } diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index 29f7b7f84..33bf76b80 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -204,7 +204,7 @@ impl crate::rpc::InnerProtocol for Rpc { request: tonic::Request, ) -> Result, tonic::Status> { Ok(tonic::Response::new( - self.inner.trigger_shutdown(request.get_ref()), + self.inner.trigger_shutdown(*request.get_ref()), )) } diff --git a/crates/engine/Cargo.toml b/crates/engine/Cargo.toml index d0dda117a..f0f80a9e2 100644 --- a/crates/engine/Cargo.toml +++ b/crates/engine/Cargo.toml @@ -16,7 +16,7 @@ async-trait = "0.1.80" bincode = "1.3.3" bytes = "1.4.0" clippy-utilities = "0.2.0" -opentelemetry = { version = "0.21.0", features = ["metrics"] } +opentelemetry = { version = "0.24.0", features = ["metrics"] } parking_lot = "0.12.3" rocksdb = { version = "0.22.0", features = ["multi-threaded-cf"] } serde = { version = "1.0.204", features = ["derive"] } diff --git a/crates/simulation/Cargo.toml b/crates/simulation/Cargo.toml index 56b1377f2..e1afc93e7 100644 --- a/crates/simulation/Cargo.toml +++ b/crates/simulation/Cargo.toml @@ -20,7 +20,7 @@ futures = "0.3.29" itertools = "0.13" madsim = "0.2.27" parking_lot = "0.12.3" -prost = "0.12.3" +prost = "0.13" tempfile = "3" tokio = { version = "0.2.25", package = "madsim-tokio", features = [ "rt", @@ -31,7 +31,7 @@ tokio = { version = "0.2.25", package = "madsim-tokio", features = [ "time", "signal", ] } -tonic = { version = "0.4.2", package = "madsim-tonic" } +tonic = { version = "0.5.0", package = "madsim-tonic" } tracing = { version = "0.1.34", features = ["std", "log", "attributes"] } utils = { path = "../utils", version = "0.1.0", features = ["parking_lot"] } workspace-hack = { version = "0.1", path = "../../workspace-hack" } @@ -40,4 +40,4 @@ xline-client = { path = "../xline-client" } xlineapi = { path = "../xlineapi" } [build-dependencies] -tonic-build = { version = "0.4.3", package = "madsim-tonic-build" } +tonic-build = { version = "0.5.0", package = "madsim-tonic-build" } diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml index 23b3fb606..57f6c7b5d 100644 --- a/crates/utils/Cargo.toml +++ b/crates/utils/Cargo.toml @@ -25,8 +25,8 @@ derive_builder = "0.20.0" event-listener = "5.3.1" futures = "0.3.30" getset = "0.1" -opentelemetry = { version = "0.22.0", features = ["trace"] } -opentelemetry_sdk = { version = "0.22.1", features = ["trace"] } +opentelemetry = { version = "0.24.0", features = ["trace"] } +opentelemetry_sdk = { version = "0.24.1", features = ["trace"] } parking_lot = { version = "0.12.3", optional = true } pbkdf2 = { version = "0.12.2", features = ["simple"] } petgraph = "0.6.4" @@ -40,19 +40,19 @@ tokio = { version = "0.2.25", package = "madsim-tokio", features = [ "rt-multi-thread", ] } toml = "0.8.14" -tonic = { version = "0.4.2", package = "madsim-tonic" } +tonic = { version = "0.5.0", package = "madsim-tonic" } tracing = "0.1.37" tracing-appender = "0.2" -tracing-opentelemetry = "0.23.0" +tracing-opentelemetry = "0.25.0" workspace-hack = { version = "0.1", path = "../../workspace-hack" } [dev-dependencies] -opentelemetry = { version = "0.22.0", features = ["trace"] } -opentelemetry-jaeger = "0.22.0" -opentelemetry-otlp = { version = "0.15.0", features = [ +opentelemetry = { version = "0.24.0", features = ["trace"] } +opentelemetry-jaeger-propagator = "0.3.0" +opentelemetry-otlp = { version = "0.17.0", features = [ "metrics", "http-proto", "reqwest-client", ] } test-macros = { path = "../test-macros" } -tracing-subscriber = "0.3.16" +tracing-subscriber = "0.3.18" diff --git a/crates/utils/src/tracing.rs b/crates/utils/src/tracing.rs index 163fc895a..36f2c7d28 100644 --- a/crates/utils/src/tracing.rs +++ b/crates/utils/src/tracing.rs @@ -81,6 +81,7 @@ impl Inject for tonic::metadata::MetadataMap { #[cfg(test)] mod test { + use opentelemetry::trace::TracerProvider as _; use opentelemetry::trace::{TraceContextExt, TraceId}; use opentelemetry_sdk::propagation::TraceContextPropagator; use tracing::info_span; @@ -89,7 +90,7 @@ mod test { }; use super::*; - #[tokio::test] + #[tokio::test(flavor = "multi_thread")] async fn test_inject_and_extract() -> Result<(), Box> { init()?; global::set_text_map_propagator(TraceContextPropagator::new()); @@ -113,11 +114,13 @@ mod test { /// init tracing subscriber fn init() -> Result<(), Box> { let otlp_exporter = opentelemetry_otlp::new_exporter().tonic(); - let jaeger_online_layer = opentelemetry_otlp::new_pipeline() + let provider = opentelemetry_otlp::new_pipeline() .tracing() .with_exporter(otlp_exporter) - .install_simple() - .map(|tracer| tracing_opentelemetry::layer().with_tracer(tracer))?; + .install_simple()?; + global::set_tracer_provider(provider.clone()); + let tracer = provider.tracer("xline"); + let jaeger_online_layer = tracing_opentelemetry::layer().with_tracer(tracer); tracing_subscriber::registry() .with(jaeger_online_layer) .init(); diff --git a/crates/xline-client/Cargo.toml b/crates/xline-client/Cargo.toml index 72c591457..556c89deb 100644 --- a/crates/xline-client/Cargo.toml +++ b/crates/xline-client/Cargo.toml @@ -18,10 +18,11 @@ clippy-utilities = "0.2.0" curp = { path = "../curp" } futures = "0.3.25" getrandom = "0.2" -http = "0.2.9" +http = "1.0" +prost = "0.13" thiserror = "1.0.61" tokio = { version = "0.2.25", package = "madsim-tokio", features = ["sync"] } -tonic = { version = "0.4.2", package = "madsim-tonic" } +tonic = { version = "0.5.0", package = "madsim-tonic" } tower = { version = "0.4", features = ["discover"] } utils = { path = "../utils", features = ["parking_lot"] } workspace-hack = { version = "0.1", path = "../../workspace-hack" } @@ -31,3 +32,6 @@ xlineapi = { path = "../xlineapi" } rand = "0.8.5" test-macros = { path = "../test-macros" } xline-test-utils = { path = "../xline-test-utils" } + +[build-dependencies] +tonic-build = { version = "0.5.0", package = "madsim-tonic-build" } diff --git a/crates/xline-client/src/types/cluster.rs b/crates/xline-client/src/types/cluster.rs index 3803e8d49..7a3a36f27 100644 --- a/crates/xline-client/src/types/cluster.rs +++ b/crates/xline-client/src/types/cluster.rs @@ -31,7 +31,7 @@ impl From for xlineapi::MemberAddRequest { } /// Request for `MemberList` -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone, Copy)] pub struct MemberListRequest { /// The inner request inner: xlineapi::MemberListRequest, @@ -56,7 +56,7 @@ impl From for xlineapi::MemberListRequest { } /// Request for `MemberPromote` -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone, Copy)] pub struct MemberPromoteRequest { /// The inner request inner: xlineapi::MemberPromoteRequest, @@ -81,7 +81,7 @@ impl From for xlineapi::MemberPromoteRequest { } /// Request for `MemberRemove` -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone, Copy)] pub struct MemberRemoveRequest { /// The inner request inner: xlineapi::MemberRemoveRequest, diff --git a/crates/xline-test-utils/Cargo.toml b/crates/xline-test-utils/Cargo.toml index 8c8c40e5d..e8a6ee5cf 100644 --- a/crates/xline-test-utils/Cargo.toml +++ b/crates/xline-test-utils/Cargo.toml @@ -21,8 +21,7 @@ tokio = { version = "0.2.25", package = "madsim-tokio", features = [ "net", "signal", ] } -# tonic = "0.11.0" -tonic = { version = "0.4.2", package = "madsim-tonic" } +tonic = { version = "0.5.0", package = "madsim-tonic" } utils = { path = "../utils", features = ["parking_lot"] } workspace-hack = { version = "0.1", path = "../../workspace-hack" } xline = { path = "../xline" } diff --git a/crates/xline/Cargo.toml b/crates/xline/Cargo.toml index 9c2c1b744..f6031c0af 100644 --- a/crates/xline/Cargo.toml +++ b/crates/xline/Cargo.toml @@ -15,7 +15,7 @@ categories = ["KV"] anyhow = "1.0.83" async-stream = "0.3.5" async-trait = "0.1.80" -axum = "0.6.20" +axum = "0.7.0" bytes = "1.4.0" clap = { version = "4", features = ["derive"] } clippy-utilities = "0.2.0" @@ -27,29 +27,31 @@ dashmap = "6.0.1" engine = { path = "../engine" } event-listener = "5.3.1" futures = "0.3.25" -hyper = "0.14.27" +hyper = "1.0.0" itertools = "0.13" jsonwebtoken = "9.3.0" log = "0.4.21" merged_range = "0.1.0" nix = "0.28.0" -opentelemetry = { version = "0.22.0", features = ["metrics"] } -opentelemetry-contrib = { version = "0.14.0", features = [ +opentelemetry = { version = "0.24.0", features = ["metrics"] } +opentelemetry-contrib = { version = "0.16.0", features = [ "jaeger_json_exporter", "rt-tokio", ] } -opentelemetry-otlp = { version = "0.15.0", features = [ +opentelemetry-otlp = { version = "0.17.0", features = [ + "grpc-tonic", "metrics", "http-proto", "reqwest-client", ] } -opentelemetry-prometheus = { version = "0.15.0" } -opentelemetry_sdk = { version = "0.22.1", features = ["metrics", "rt-tokio"] } +opentelemetry-prometheus = { version = "0.17.0" } +opentelemetry_sdk = { version = "0.24.1", features = ["metrics", "rt-tokio"] } parking_lot = "0.12.3" pbkdf2 = { version = "0.12.2", features = ["simple"] } priority-queue = "2.0.2" prometheus = "0.13.4" -prost = "0.12.3" +prost = "0.13.0" +real_tokio = { version = "1", package = "tokio" } serde = { version = "1.0.204", features = ["derive"] } sha2 = "0.10.6" tokio = { version = "0.2.25", package = "madsim-tokio", features = [ @@ -62,12 +64,11 @@ tokio = { version = "0.2.25", package = "madsim-tokio", features = [ tokio-stream = { git = "https://github.com/madsim-rs/tokio.git", rev = "ab251ad" } tokio-util = { version = "0.7.11", features = ["io"] } toml = "0.8.14" -# tonic = "0.11.0" -tonic = { version = "0.4.2", package = "madsim-tonic" } -tonic-health = "0.11.0" +tonic = { version = "0.5.0", package = "madsim-tonic" } +tonic-health = "0.12.0" tracing = "0.1.37" tracing-appender = "0.2" -tracing-opentelemetry = "0.23.0" +tracing-opentelemetry = "0.25.0" tracing-subscriber = { version = "0.3.16", features = ["env-filter"] } utils = { path = "../utils", features = ["parking_lot"] } uuid = { version = "1.10.0", features = ["v4"] } @@ -76,10 +77,10 @@ x509-certificate = "0.23.1" xlineapi = { path = "../xlineapi" } [build-dependencies] -tonic-build = { version = "0.4.3", package = "madsim-tonic-build" } +tonic-build = { version = "0.5.0", package = "madsim-tonic-build" } [dev-dependencies] -etcd-client = { version = "0.13.0", features = ["tls"] } +etcd-client = { version = "0.14.0", features = ["tls"] } mockall = "0.12.1" rand = "0.8.5" strum = "0.26" diff --git a/crates/xline/src/server/maintenance.rs b/crates/xline/src/server/maintenance.rs index e8bc522c1..2ecd5943e 100644 --- a/crates/xline/src/server/maintenance.rs +++ b/crates/xline/src/server/maintenance.rs @@ -254,7 +254,7 @@ fn snapshot_stream( } checksum_gen.update(&buf); yield SnapshotResponse { - header: Some(header.clone()), + header: Some(header), remaining_bytes: remain_size, blob: Vec::from(buf) }; diff --git a/crates/xline/src/server/xline_server.rs b/crates/xline/src/server/xline_server.rs index fd3770e74..ed4978148 100644 --- a/crates/xline/src/server/xline_server.rs +++ b/crates/xline/src/server/xline_server.rs @@ -692,7 +692,7 @@ impl XlineServer { #[cfg(not(madsim))] fn bind_addrs( addrs: &[String], -) -> Result>> { +) -> Result>> { use std::net::ToSocketAddrs; if addrs.is_empty() { return Err(anyhow!("No address to bind")); diff --git a/crates/xline/src/storage/alarm_store.rs b/crates/xline/src/storage/alarm_store.rs index 98cdc6ad7..95a0e567b 100644 --- a/crates/xline/src/storage/alarm_store.rs +++ b/crates/xline/src/storage/alarm_store.rs @@ -160,10 +160,10 @@ impl AlarmStore { fn handle_alarm_get(&self, alarm: AlarmType) -> Vec { let types = self.types.read(); match alarm { - AlarmType::None => types.values().flat_map(HashMap::values).cloned().collect(), + AlarmType::None => types.values().flat_map(HashMap::values).copied().collect(), a @ (AlarmType::Nospace | AlarmType::Corrupt) => types .get(&a) - .map(|s| s.values().cloned().collect()) + .map(|s| s.values().copied().collect()) .unwrap_or_default(), } } @@ -175,7 +175,7 @@ impl AlarmStore { .read() .get(&alarm) .and_then(|e| e.get(&member_id)) - .map_or_else(|| vec![new_alarm], |m| vec![m.clone()]) + .map_or_else(|| vec![new_alarm], |m| vec![*m]) } /// Handle alarm deactivate request @@ -184,7 +184,7 @@ impl AlarmStore { .read() .get(&alarm) .and_then(|e| e.get(&member_id)) - .map(|m| vec![m.clone()]) + .map(|m| vec![*m]) .unwrap_or_default() } @@ -195,7 +195,7 @@ impl AlarmStore { let e = types_w.entry(alarm).or_default(); let mut ops = vec![]; if e.get(&member_id).is_none() { - _ = e.insert(new_alarm.member_id, new_alarm.clone()); + _ = e.insert(new_alarm.member_id, new_alarm); ops.push(WriteOp::PutAlarm(new_alarm)); } self.refresh_current_alarm(&types_w); diff --git a/crates/xline/src/storage/auth_store/store.rs b/crates/xline/src/storage/auth_store/store.rs index 771b7c8b6..b0f6c0e5e 100644 --- a/crates/xline/src/storage/auth_store/store.rs +++ b/crates/xline/src/storage/auth_store/store.rs @@ -193,13 +193,13 @@ impl AuthStore { ) -> Result { #[allow(clippy::wildcard_enum_match_arm)] let res = match *request { - RequestWrapper::AuthEnableRequest(ref req) => { + RequestWrapper::AuthEnableRequest(req) => { self.handle_auth_enable_request(req).map(Into::into) } - RequestWrapper::AuthDisableRequest(ref req) => { + RequestWrapper::AuthDisableRequest(req) => { Ok(self.handle_auth_disable_request(req).into()) } - RequestWrapper::AuthStatusRequest(ref req) => { + RequestWrapper::AuthStatusRequest(req) => { Ok(self.handle_auth_status_request(req).into()) } RequestWrapper::AuthUserAddRequest(ref req) => { @@ -208,7 +208,7 @@ impl AuthStore { RequestWrapper::AuthUserGetRequest(ref req) => { self.handle_user_get_request(req).map(Into::into) } - RequestWrapper::AuthUserListRequest(ref req) => { + RequestWrapper::AuthUserListRequest(req) => { self.handle_user_list_request(req).map(Into::into) } RequestWrapper::AuthUserGrantRoleRequest(ref req) => { @@ -238,7 +238,7 @@ impl AuthStore { RequestWrapper::AuthRoleDeleteRequest(ref req) => { self.handle_role_delete_request(req).map(Into::into) } - RequestWrapper::AuthRoleListRequest(ref req) => { + RequestWrapper::AuthRoleListRequest(req) => { self.handle_role_list_request(req).map(Into::into) } RequestWrapper::AuthenticateRequest(ref req) => { @@ -254,7 +254,7 @@ impl AuthStore { /// Handle `AuthEnableRequest` fn handle_auth_enable_request( &self, - _req: &AuthEnableRequest, + _req: AuthEnableRequest, ) -> Result { debug!("handle_auth_enable"); let res = Ok(AuthEnableResponse { @@ -272,7 +272,7 @@ impl AuthStore { } /// Handle `AuthDisableRequest` - fn handle_auth_disable_request(&self, _req: &AuthDisableRequest) -> AuthDisableResponse { + fn handle_auth_disable_request(&self, _req: AuthDisableRequest) -> AuthDisableResponse { debug!("handle_auth_disable"); if !self.is_enabled() { debug!("auth is already disabled"); @@ -283,7 +283,7 @@ impl AuthStore { } /// Handle `AuthStatusRequest` - fn handle_auth_status_request(&self, _req: &AuthStatusRequest) -> AuthStatusResponse { + fn handle_auth_status_request(&self, _req: AuthStatusRequest) -> AuthStatusResponse { debug!("handle_auth_status"); AuthStatusResponse { header: Some(self.header_gen.gen_auth_header()), @@ -339,7 +339,7 @@ impl AuthStore { /// Handle `AuthUserListRequest` fn handle_user_list_request( &self, - _req: &AuthUserListRequest, + _req: AuthUserListRequest, ) -> Result { debug!("handle_user_list_request"); let users = self @@ -458,7 +458,7 @@ impl AuthStore { /// Handle `AuthRoleListRequest` fn handle_role_list_request( &self, - _req: &AuthRoleListRequest, + _req: AuthRoleListRequest, ) -> Result { debug!("handle_role_list_request"); let roles = self @@ -641,7 +641,7 @@ impl AuthStore { let user = User { name: req.name.as_str().into(), password: req.hashed_password.as_str().into(), - options: req.options.clone(), + options: req.options, roles: Vec::new(), }; ops.push(WriteOp::PutAuthRevision(revision)); @@ -969,7 +969,7 @@ impl AuthStore { self.check_txn_permission(username, txn_req)?; } RequestWrapper::LeaseRevokeRequest(ref lease_revoke_req) => { - self.check_lease_revoke_permission(username, lease_revoke_req)?; + self.check_lease_revoke_permission(username, *lease_revoke_req)?; } RequestWrapper::AuthUserGetRequest(ref user_get_req) => { self.check_admin_permission(username).map_or_else( @@ -1073,7 +1073,7 @@ impl AuthStore { fn check_lease_revoke_permission( &self, username: &str, - req: &LeaseRevokeRequest, + req: LeaseRevokeRequest, ) -> Result<(), ExecuteError> { self.check_lease(username, req.id) } diff --git a/crates/xline/src/storage/lease_store/mod.rs b/crates/xline/src/storage/lease_store/mod.rs index ed68a15bb..619bb67c6 100644 --- a/crates/xline/src/storage/lease_store/mod.rs +++ b/crates/xline/src/storage/lease_store/mod.rs @@ -199,11 +199,11 @@ impl LeaseStore { debug!("Receive LeaseGrantRequest {:?}", req); self.handle_lease_grant_request(req).map(Into::into) } - RequestWrapper::LeaseRevokeRequest(ref req) => { + RequestWrapper::LeaseRevokeRequest(req) => { debug!("Receive LeaseRevokeRequest {:?}", req); self.handle_lease_revoke_request(req).map(Into::into) } - RequestWrapper::LeaseLeasesRequest(ref req) => { + RequestWrapper::LeaseLeasesRequest(req) => { debug!("Receive LeaseLeasesRequest {:?}", req); Ok(self.handle_lease_leases_request(req).into()) } @@ -240,7 +240,7 @@ impl LeaseStore { /// Handle `LeaseRevokeRequest` fn handle_lease_revoke_request( &self, - req: &LeaseRevokeRequest, + req: LeaseRevokeRequest, ) -> Result { if self.lease_collection.contains_lease(req.id) { _ = self.unsynced_cache.write().insert(req.id); @@ -254,7 +254,7 @@ impl LeaseStore { } /// Handle `LeaseRevokeRequest` - fn handle_lease_leases_request(&self, _req: &LeaseLeasesRequest) -> LeaseLeasesResponse { + fn handle_lease_leases_request(&self, _req: LeaseLeasesRequest) -> LeaseLeasesResponse { let leases = self .leases() .into_iter() diff --git a/crates/xline/src/utils/metrics.rs b/crates/xline/src/utils/metrics.rs index 8d500dca7..3621936b6 100644 --- a/crates/xline/src/utils/metrics.rs +++ b/crates/xline/src/utils/metrics.rs @@ -1,3 +1,5 @@ +use std::net::SocketAddr; + use opentelemetry::global; use opentelemetry_otlp::WithExportConfig; use opentelemetry_sdk::{metrics::SdkMeterProvider, runtime::Tokio}; @@ -49,7 +51,7 @@ pub fn init_metrics(config: &MetricsConfig) -> anyhow::Result<()> { let provider = SdkMeterProvider::builder().with_reader(exporter).build(); global::set_meter_provider(provider); - let addr = format!("0.0.0.0:{}", config.port()) + let addr: SocketAddr = format!("0.0.0.0:{}", config.port()) .parse() .unwrap_or_else(|_| { unreachable!("local address 0.0.0.0:{} should be parsed", config.port()) @@ -57,9 +59,8 @@ pub fn init_metrics(config: &MetricsConfig) -> anyhow::Result<()> { info!("metrics server start on {addr:?}"); let app = axum::Router::new().route(config.path(), axum::routing::any(metrics)); let _ig = tokio::spawn(async move { - axum::Server::bind(&addr) - .serve(app.into_make_service()) - .await + let listener = real_tokio::net::TcpListener::bind(addr).await?; + axum::serve(listener, app).await }); Ok(()) diff --git a/crates/xline/src/utils/trace.rs b/crates/xline/src/utils/trace.rs index 9384626a1..cbfff13d3 100644 --- a/crates/xline/src/utils/trace.rs +++ b/crates/xline/src/utils/trace.rs @@ -1,9 +1,12 @@ use anyhow::{Ok, Result}; +use opentelemetry::global; +use opentelemetry::trace::TracerProvider; use opentelemetry_contrib::trace::exporter::jaeger_json::JaegerJsonExporter; use opentelemetry_sdk::runtime::Tokio; use tracing::warn; use tracing_appender::non_blocking::WorkerGuard; -use tracing_subscriber::{fmt::format, layer::SubscriberExt, util::SubscriberInitExt, Layer}; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::{fmt::format, util::SubscriberInitExt, Layer}; use utils::config::{file_appender, LogConfig, RotationConfig, TraceConfig}; /// Return a Box trait from the config @@ -36,6 +39,10 @@ pub fn init_subscriber( .tracing() .with_exporter(otlp_exporter) .install_batch(Tokio) + .map(|provider| { + let _prev = global::set_tracer_provider(provider.clone()); + provider.tracer("xline") + }) .ok() }) .flatten() diff --git a/crates/xlineapi/Cargo.toml b/crates/xlineapi/Cargo.toml index bab2a98b0..1e984f799 100644 --- a/crates/xlineapi/Cargo.toml +++ b/crates/xlineapi/Cargo.toml @@ -15,15 +15,15 @@ async-trait = "0.1.80" curp = { path = "../curp" } curp-external-api = { path = "../curp-external-api" } itertools = "0.13" -prost = "0.12.3" +prost = "0.13" serde = { version = "1.0.204", features = ["derive"] } thiserror = "1.0.61" -tonic = { version = "0.4.2", package = "madsim-tonic" } +tonic = { version = "0.5.0", package = "madsim-tonic" } utils = { path = "../utils", features = ["parking_lot"] } workspace-hack = { version = "0.1", path = "../../workspace-hack" } [build-dependencies] -tonic-build = { version = "0.4.3", package = "madsim-tonic-build" } +tonic-build = { version = "0.5.0", package = "madsim-tonic-build" } [dev-dependencies] strum = "0.26" diff --git a/crates/xlinectl/Cargo.toml b/crates/xlinectl/Cargo.toml index c88b94dce..b90a9dea0 100644 --- a/crates/xlinectl/Cargo.toml +++ b/crates/xlinectl/Cargo.toml @@ -18,7 +18,7 @@ serde = { version = "1.0.204", features = ["derive"] } serde_json = "1.0.117" shlex = "1.3.0" tokio = "1" -tonic = { version = "0.4.2", package = "madsim-tonic" } +tonic = { version = "0.5.0", package = "madsim-tonic" } utils = { path = "../utils" } workspace-hack = { version = "0.1", path = "../../workspace-hack" } xline-client = { path = "../xline-client" } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index c4c4905db..69ae00c51 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -13,33 +13,35 @@ publish = false ### BEGIN HAKARI SECTION [dependencies] -axum = { version = "0.6" } +axum = { version = "0.7" } +axum-core = { version = "0.4", default-features = false, features = ["tracing"] } bytes = { version = "1" } clap = { version = "4", features = ["derive"] } -crossbeam-utils = { version = "0.8" } crypto-common = { version = "0.1", default-features = false, features = ["std"] } digest = { version = "0.10", features = ["mac", "std"] } either = { version = "1", default-features = false, features = ["use_std"] } futures-channel = { version = "0.3", features = ["sink"] } futures-util = { version = "0.3", features = ["channel", "io", "sink"] } getrandom = { version = "0.2", default-features = false, features = ["js", "rdrand", "std"] } +itertools = { version = "0.13" } libc = { version = "0.2", features = ["extra_traits"] } log = { version = "0.4", default-features = false, features = ["std"] } -madsim-tokio = { git = "https://github.com/Phoenix500526/madsim.git", branch = "update-tonic", default-features = false, features = ["fs", "io-util", "macros", "net", "rt", "rt-multi-thread", "signal", "sync", "time"] } -madsim-tonic = { git = "https://github.com/Phoenix500526/madsim.git", branch = "update-tonic", default-features = false, features = ["tls"] } +madsim-tokio = { git = "https://github.com/LucienY01/madsim.git", branch = "bz/tonic-0-12", default-features = false, features = ["fs", "io-util", "macros", "net", "rt", "rt-multi-thread", "signal", "sync", "time"] } +madsim-tonic = { git = "https://github.com/LucienY01/madsim.git", branch = "bz/tonic-0-12", default-features = false, features = ["tls"] } memchr = { version = "2" } -num-traits = { version = "0.2", default-features = false, features = ["i128", "std"] } -opentelemetry_sdk = { version = "0.22", features = ["metrics", "rt-tokio"] } +opentelemetry_sdk = { version = "0.24", features = ["rt-tokio"] } petgraph = { version = "0.6" } predicates = { version = "3", default-features = false, features = ["diff"] } serde = { version = "1", features = ["derive", "rc"] } serde_json = { version = "1", features = ["raw_value"] } sha2 = { version = "0.10" } +smallvec = { version = "1", default-features = false, features = ["const_new"] } time = { version = "0.3", features = ["formatting", "macros", "parsing"] } tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "rt-multi-thread", "signal", "sync", "time"] } +tokio-stream = { version = "0.1", features = ["net"] } tokio-util = { version = "0.7", features = ["codec", "io"] } -tonic = { version = "0.11", features = ["tls"] } -tower = { version = "0.4", features = ["balance", "buffer", "filter", "limit", "timeout", "util"] } +tonic = { version = "0.12", features = ["tls"] } +tower = { version = "0.4", features = ["balance", "buffer", "filter", "limit", "util"] } tracing = { version = "0.1", features = ["log"] } tracing-log = { version = "0.2", default-features = false, features = ["log-tracer", "std"] } tracing-subscriber = { version = "0.3", features = ["env-filter", "time"] } @@ -49,7 +51,7 @@ zeroize = { version = "1", features = ["derive"] } bytes = { version = "1" } cc = { version = "1", default-features = false, features = ["parallel"] } either = { version = "1", default-features = false, features = ["use_std"] } -itertools = { version = "0.12", default-features = false, features = ["use_alloc"] } +itertools = { version = "0.13" } libc = { version = "0.2", features = ["extra_traits"] } log = { version = "0.4", default-features = false, features = ["std"] } memchr = { version = "2" } From 8852084ef85b2e457475219e552c08fd0990d54f Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Thu, 18 Jul 2024 21:56:36 +0800 Subject: [PATCH 025/322] refactor(client)!: refactor MaintenanceClient::alarm Signed-off-by: lxl66566 --- .../xline-client/src/clients/maintenance.rs | 22 +++++++++++++++---- crates/xline-client/src/types/maintenance.rs | 1 - crates/xline-client/src/types/mod.rs | 2 -- crates/xline/tests/it/maintenance_test.rs | 4 ++-- 4 files changed, 20 insertions(+), 9 deletions(-) delete mode 100644 crates/xline-client/src/types/maintenance.rs diff --git a/crates/xline-client/src/clients/maintenance.rs b/crates/xline-client/src/clients/maintenance.rs index 8d2ae1718..c2b7e1bd5 100644 --- a/crates/xline-client/src/clients/maintenance.rs +++ b/crates/xline-client/src/clients/maintenance.rs @@ -2,7 +2,8 @@ use std::{fmt::Debug, sync::Arc}; use tonic::{transport::Channel, Streaming}; use xlineapi::{ - AlarmRequest, AlarmResponse, SnapshotRequest, SnapshotResponse, StatusRequest, StatusResponse, + AlarmAction, AlarmRequest, AlarmResponse, AlarmType, SnapshotRequest, SnapshotResponse, + StatusRequest, StatusResponse, }; use crate::{error::Result, AuthService}; @@ -95,14 +96,27 @@ impl MaintenanceClient { /// .await? /// .maintenance_client(); /// - /// client.alarm(AlarmRequest::new(AlarmAction::Get, 0, AlarmType::None)).await?; + /// client.alarm(AlarmAction::Get, 0, AlarmType::None).await?; /// /// Ok(()) /// } /// ``` #[inline] - pub async fn alarm(&mut self, request: AlarmRequest) -> Result { - Ok(self.inner.alarm(request).await?.into_inner()) + pub async fn alarm( + &mut self, + action: AlarmAction, + member_id: u64, + alarm_type: AlarmType, + ) -> Result { + Ok(self + .inner + .alarm(AlarmRequest { + action: action.into(), + member_id, + alarm: alarm_type.into(), + }) + .await? + .into_inner()) } /// Sends a status request diff --git a/crates/xline-client/src/types/maintenance.rs b/crates/xline-client/src/types/maintenance.rs deleted file mode 100644 index 44dead5f0..000000000 --- a/crates/xline-client/src/types/maintenance.rs +++ /dev/null @@ -1 +0,0 @@ -pub use xlineapi::SnapshotResponse; diff --git a/crates/xline-client/src/types/mod.rs b/crates/xline-client/src/types/mod.rs index c1bec0e75..0ba876eee 100644 --- a/crates/xline-client/src/types/mod.rs +++ b/crates/xline-client/src/types/mod.rs @@ -6,8 +6,6 @@ pub mod cluster; pub mod kv; /// Lease type definitions pub mod lease; -/// Maintenance type definitions. -pub mod maintenance; /// Range Option definitions, to build a `range_end` from key. pub mod range_end; /// Watch type definitions. diff --git a/crates/xline/tests/it/maintenance_test.rs b/crates/xline/tests/it/maintenance_test.rs index eb0f5ed0e..77cde73cd 100644 --- a/crates/xline/tests/it/maintenance_test.rs +++ b/crates/xline/tests/it/maintenance_test.rs @@ -6,7 +6,7 @@ use tokio::io::AsyncWriteExt; use xline::restore::restore; use xline_client::error::XlineClientError; use xline_test_utils::{Client, ClientOptions, Cluster}; -use xlineapi::{execute_error::ExecuteError, AlarmAction, AlarmRequest, AlarmType}; +use xlineapi::{execute_error::ExecuteError, AlarmAction, AlarmType}; #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] @@ -92,7 +92,7 @@ async fn test_alarm(idx: usize) { } tokio::time::sleep(tokio::time::Duration::from_millis(500)).await; let res = m_client - .alarm(AlarmRequest::new(AlarmAction::Get, 0, AlarmType::None)) + .alarm(AlarmAction::Get, 0, AlarmType::None) .await .unwrap(); assert!(!res.alarms.is_empty()); From 4bc9bc1a77e6a0e38840e488613810cd463aca70 Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Thu, 18 Jul 2024 14:57:09 +0800 Subject: [PATCH 026/322] refactor(client)!: ClusterClient::* Signed-off-by: lxl66566 refactor(client)!: ClusterClient::member_remove Signed-off-by: lxl66566 refactor(client)!: ClusterClient::member_promote Signed-off-by: lxl66566 refactor(client)!: ClusterClient::member_update Signed-off-by: lxl66566 refactor(client)!: ClusterClient::member_list Signed-off-by: lxl66566 refactor(client)!: modify code in simulation Signed-off-by: lxl66566 fix: ci fail Signed-off-by: lxl66566 --- crates/simulation/src/xline_group.rs | 25 ++-- crates/simulation/tests/it/xline.rs | 22 +-- crates/xline-client/examples/cluster.rs | 30 +--- crates/xline-client/src/clients/cluster.rs | 66 ++++----- crates/xline-client/src/types/cluster.rs | 133 ------------------ crates/xline-client/src/types/mod.rs | 2 - crates/xline/tests/it/cluster_test.rs | 31 ++-- crates/xlinectl/src/command/member/add.rs | 18 ++- crates/xlinectl/src/command/member/list.rs | 18 ++- crates/xlinectl/src/command/member/promote.rs | 15 +- crates/xlinectl/src/command/member/remove.rs | 15 +- crates/xlinectl/src/command/member/update.rs | 18 ++- 12 files changed, 109 insertions(+), 284 deletions(-) delete mode 100644 crates/xline-client/src/types/cluster.rs diff --git a/crates/simulation/src/xline_group.rs b/crates/simulation/src/xline_group.rs index bd6e7ae8b..e0229f6f5 100644 --- a/crates/simulation/src/xline_group.rs +++ b/crates/simulation/src/xline_group.rs @@ -12,13 +12,15 @@ use xline::server::XlineServer; use xline_client::{ error::XlineClientError, types::{ - cluster::{MemberAddRequest, MemberAddResponse, MemberListRequest, MemberListResponse}, kv::{CompactionResponse, PutOptions, PutResponse, RangeOptions, RangeResponse}, watch::{WatchOptions, WatchStreaming, Watcher}, }, Client, ClientOptions, }; -use xlineapi::{command::Command, ClusterClient, KvClient, RequestUnion, WatchClient}; +use xlineapi::{ + command::Command, ClusterClient, KvClient, MemberAddResponse, MemberListResponse, RequestUnion, + WatchClient, +}; pub struct XlineNode { pub client_url: String, @@ -340,15 +342,20 @@ impl SimEtcdClient { .unwrap() } - pub async fn member_add( - &self, - request: MemberAddRequest, + pub async fn member_add>( + &mut self, + peer_urls: impl Into>, + is_learner: bool, ) -> Result> { let mut client = self.cluster.clone(); + let peer_urls: Vec = peer_urls.into().into_iter().map(Into::into).collect(); self.handle .spawn(async move { client - .member_add(xlineapi::MemberAddRequest::from(request)) + .member_add(xlineapi::MemberAddRequest { + peer_ur_ls: peer_urls, + is_learner, + }) .await .map(|r| r.into_inner()) .map_err(Into::into) @@ -358,14 +365,14 @@ impl SimEtcdClient { } pub async fn member_list( - &self, - request: MemberListRequest, + &mut self, + linearizable: bool, ) -> Result> { let mut client = self.cluster.clone(); self.handle .spawn(async move { client - .member_list(xlineapi::MemberListRequest::from(request)) + .member_list(xlineapi::MemberListRequest { linearizable }) .await .map(|r| r.into_inner()) .map_err(Into::into) diff --git a/crates/simulation/tests/it/xline.rs b/crates/simulation/tests/it/xline.rs index 8bab83021..2e9b48501 100644 --- a/crates/simulation/tests/it/xline.rs +++ b/crates/simulation/tests/it/xline.rs @@ -3,10 +3,7 @@ use std::time::Duration; use curp_test_utils::init_logger; use madsim::time::sleep; use simulation::xline_group::{SimEtcdClient, XlineGroup}; -use xline_client::types::{ - cluster::{MemberAddRequest, MemberListRequest}, - watch::WatchOptions, -}; +use xline_client::types::watch::WatchOptions; // TODO: Add more tests if needed @@ -49,29 +46,20 @@ async fn xline_members_restore() { let mut group = XlineGroup::new(3).await; let node = group.get_node("S1"); let addr = node.client_url.clone(); - let client = SimEtcdClient::new(addr, group.client_handle.clone()).await; + let mut client = SimEtcdClient::new(addr, group.client_handle.clone()).await; let res = client - .member_add(MemberAddRequest::new( - vec!["http://192.168.1.4:12345".to_owned()], - true, - )) + .member_add(["http://192.168.1.4:12345"], true) .await .unwrap(); assert_eq!(res.members.len(), 4); - let members = client - .member_list(MemberListRequest::new(false)) - .await - .unwrap(); + let members = client.member_list(false).await.unwrap(); assert_eq!(members.members.len(), 4); group.crash("S1").await; sleep(Duration::from_secs(10)).await; group.restart("S1").await; sleep(Duration::from_secs(10)).await; - let members = client - .member_list(MemberListRequest::new(false)) - .await - .unwrap(); + let members = client.member_list(false).await.unwrap(); assert_eq!(members.members.len(), 4); } diff --git a/crates/xline-client/examples/cluster.rs b/crates/xline-client/examples/cluster.rs index ce52558c8..859afdf6b 100644 --- a/crates/xline-client/examples/cluster.rs +++ b/crates/xline-client/examples/cluster.rs @@ -1,11 +1,5 @@ use anyhow::Result; -use xline_client::{ - types::cluster::{ - MemberAddRequest, MemberListRequest, MemberPromoteRequest, MemberRemoveRequest, - MemberUpdateRequest, - }, - Client, ClientOptions, -}; +use xline_client::{Client, ClientOptions}; #[tokio::main] async fn main() -> Result<()> { @@ -17,7 +11,7 @@ async fn main() -> Result<()> { .cluster_client(); // send a linearizable member list request - let resp = client.member_list(MemberListRequest::new(true)).await?; + let resp = client.member_list(true).await?; println!("members: {:?}", resp.members); // whether the added member is a learner. @@ -25,36 +19,24 @@ async fn main() -> Result<()> { let is_learner = true; // add a normal node into the cluster - let resp = client - .member_add(MemberAddRequest::new( - vec!["127.0.0.1:2379".to_owned()], - is_learner, - )) - .await?; + let resp = client.member_add(["127.0.0.1:2379"], is_learner).await?; let added_member = resp.member.unwrap(); println!("members: {:?}, added: {}", resp.members, added_member.id); if is_learner { // promote the learner to a normal node - let resp = client - .member_promote(MemberPromoteRequest::new(added_member.id)) - .await?; + let resp = client.member_promote(added_member.id).await?; println!("members: {:?}", resp.members); } // update the peer_ur_ls of the added member if the network topology has changed. let resp = client - .member_update(MemberUpdateRequest::new( - added_member.id, - vec!["127.0.0.2:2379".to_owned()], - )) + .member_update(added_member.id, ["127.0.0.2:2379"]) .await?; println!("members: {:?}", resp.members); // remove the member from the cluster if it is no longer needed. - let resp = client - .member_remove(MemberRemoveRequest::new(added_member.id)) - .await?; + let resp = client.member_remove(added_member.id).await?; println!("members: {:?}", resp.members); Ok(()) diff --git a/crates/xline-client/src/clients/cluster.rs b/crates/xline-client/src/clients/cluster.rs index a4ec0cc0c..545d28510 100644 --- a/crates/xline-client/src/clients/cluster.rs +++ b/crates/xline-client/src/clients/cluster.rs @@ -2,14 +2,10 @@ use std::sync::Arc; use tonic::transport::Channel; -use crate::{ - error::Result, - types::cluster::{ - MemberAddRequest, MemberAddResponse, MemberListRequest, MemberListResponse, - MemberPromoteRequest, MemberPromoteResponse, MemberRemoveRequest, MemberRemoveResponse, - MemberUpdateRequest, MemberUpdateResponse, - }, - AuthService, +use crate::{error::Result, AuthService}; +use xlineapi::{ + MemberAddResponse, MemberListResponse, MemberPromoteResponse, MemberRemoveResponse, + MemberUpdateResponse, }; /// Client for Cluster operations. @@ -47,7 +43,6 @@ impl ClusterClient { /// /// ```no_run /// use xline_client::{Client, ClientOptions}; - /// use xline_client::types::cluster::*; /// use anyhow::Result; /// /// #[tokio::main] @@ -58,7 +53,7 @@ impl ClusterClient { /// .await? /// .cluster_client(); /// - /// let resp = client.member_add(MemberAddRequest::new(vec!["127.0.0.1:2380".to_owned()], true)).await?; + /// let resp = client.member_add(["127.0.0.1:2380"], true).await?; /// /// println!( /// "members: {:?}, added: {:?}", @@ -69,10 +64,17 @@ impl ClusterClient { /// } /// ``` #[inline] - pub async fn member_add(&mut self, request: MemberAddRequest) -> Result { + pub async fn member_add>( + &mut self, + peer_urls: impl Into>, + is_learner: bool, + ) -> Result { Ok(self .inner - .member_add(xlineapi::MemberAddRequest::from(request)) + .member_add(xlineapi::MemberAddRequest { + peer_ur_ls: peer_urls.into().into_iter().map(Into::into).collect(), + is_learner, + }) .await? .into_inner()) } @@ -87,7 +89,6 @@ impl ClusterClient { /// /// ```no_run /// use xline_client::{Client, ClientOptions}; - /// use xline_client::types::cluster::*; /// use anyhow::Result; /// /// #[tokio::main] @@ -97,7 +98,7 @@ impl ClusterClient { /// let mut client = Client::connect(curp_members, ClientOptions::default()) /// .await? /// .cluster_client(); - /// let resp = client.member_remove(MemberRemoveRequest::new(1)).await?; + /// let resp = client.member_remove(1).await?; /// /// println!("members: {:?}", resp.members); /// @@ -105,13 +106,10 @@ impl ClusterClient { /// } /// #[inline] - pub async fn member_remove( - &mut self, - request: MemberRemoveRequest, - ) -> Result { + pub async fn member_remove(&mut self, id: u64) -> Result { Ok(self .inner - .member_remove(xlineapi::MemberRemoveRequest::from(request)) + .member_remove(xlineapi::MemberRemoveRequest { id }) .await? .into_inner()) } @@ -126,7 +124,6 @@ impl ClusterClient { /// /// ```no_run /// use xline_client::{Client, ClientOptions}; - /// use xline_client::types::cluster::*; /// use anyhow::Result; /// /// #[tokio::main] @@ -136,7 +133,7 @@ impl ClusterClient { /// let mut client = Client::connect(curp_members, ClientOptions::default()) /// .await? /// .cluster_client(); - /// let resp = client.member_promote(MemberPromoteRequest::new(1)).await?; + /// let resp = client.member_promote(1).await?; /// /// println!("members: {:?}", resp.members); /// @@ -144,13 +141,10 @@ impl ClusterClient { /// } /// #[inline] - pub async fn member_promote( - &mut self, - request: MemberPromoteRequest, - ) -> Result { + pub async fn member_promote(&mut self, id: u64) -> Result { Ok(self .inner - .member_promote(xlineapi::MemberPromoteRequest::from(request)) + .member_promote(xlineapi::MemberPromoteRequest { id }) .await? .into_inner()) } @@ -165,7 +159,6 @@ impl ClusterClient { /// /// ```no_run /// use xline_client::{Client, ClientOptions}; - /// use xline_client::types::cluster::*; /// use anyhow::Result; /// /// #[tokio::main] @@ -175,7 +168,7 @@ impl ClusterClient { /// let mut client = Client::connect(curp_members, ClientOptions::default()) /// .await? /// .cluster_client(); - /// let resp = client.member_update(MemberUpdateRequest::new(1, vec!["127.0.0.1:2379".to_owned()])).await?; + /// let resp = client.member_update(1, ["127.0.0.1:2379"]).await?; /// /// println!("members: {:?}", resp.members); /// @@ -183,13 +176,17 @@ impl ClusterClient { /// } /// #[inline] - pub async fn member_update( + pub async fn member_update>( &mut self, - request: MemberUpdateRequest, + id: u64, + peer_urls: impl Into>, ) -> Result { Ok(self .inner - .member_update(xlineapi::MemberUpdateRequest::from(request)) + .member_update(xlineapi::MemberUpdateRequest { + id, + peer_ur_ls: peer_urls.into().into_iter().map(Into::into).collect(), + }) .await? .into_inner()) } @@ -204,7 +201,6 @@ impl ClusterClient { /// /// ```no_run /// use xline_client::{Client, ClientOptions}; - /// use xline_client::types::cluster::*; /// use anyhow::Result; /// /// #[tokio::main] @@ -214,17 +210,17 @@ impl ClusterClient { /// let mut client = Client::connect(curp_members, ClientOptions::default()) /// .await? /// .cluster_client(); - /// let resp = client.member_list(MemberListRequest::new(false)).await?; + /// let resp = client.member_list(false).await?; /// /// println!("members: {:?}", resp.members); /// /// Ok(()) /// } #[inline] - pub async fn member_list(&mut self, request: MemberListRequest) -> Result { + pub async fn member_list(&mut self, linearizable: bool) -> Result { Ok(self .inner - .member_list(xlineapi::MemberListRequest::from(request)) + .member_list(xlineapi::MemberListRequest { linearizable }) .await? .into_inner()) } diff --git a/crates/xline-client/src/types/cluster.rs b/crates/xline-client/src/types/cluster.rs deleted file mode 100644 index 7a3a36f27..000000000 --- a/crates/xline-client/src/types/cluster.rs +++ /dev/null @@ -1,133 +0,0 @@ -pub use xlineapi::{ - Cluster, Member, MemberAddResponse, MemberListResponse, MemberPromoteResponse, - MemberRemoveResponse, MemberUpdateResponse, -}; - -/// Request for `MemberAdd` -#[derive(Debug, PartialEq)] -pub struct MemberAddRequest { - /// The inner request - inner: xlineapi::MemberAddRequest, -} - -impl MemberAddRequest { - /// Creates a new `MemberAddRequest` - #[inline] - pub fn new(peer_ur_ls: impl Into>, is_learner: bool) -> Self { - Self { - inner: xlineapi::MemberAddRequest { - peer_ur_ls: peer_ur_ls.into(), - is_learner, - }, - } - } -} - -impl From for xlineapi::MemberAddRequest { - #[inline] - fn from(req: MemberAddRequest) -> Self { - req.inner - } -} - -/// Request for `MemberList` -#[derive(Debug, PartialEq, Clone, Copy)] -pub struct MemberListRequest { - /// The inner request - inner: xlineapi::MemberListRequest, -} - -impl MemberListRequest { - /// Creates a new `MemberListRequest` - #[inline] - #[must_use] - pub fn new(linearizable: bool) -> Self { - Self { - inner: xlineapi::MemberListRequest { linearizable }, - } - } -} - -impl From for xlineapi::MemberListRequest { - #[inline] - fn from(req: MemberListRequest) -> Self { - req.inner - } -} - -/// Request for `MemberPromote` -#[derive(Debug, PartialEq, Clone, Copy)] -pub struct MemberPromoteRequest { - /// The inner request - inner: xlineapi::MemberPromoteRequest, -} - -impl MemberPromoteRequest { - /// Creates a new `MemberPromoteRequest` - #[inline] - #[must_use] - pub fn new(id: u64) -> Self { - Self { - inner: xlineapi::MemberPromoteRequest { id }, - } - } -} - -impl From for xlineapi::MemberPromoteRequest { - #[inline] - fn from(req: MemberPromoteRequest) -> Self { - req.inner - } -} - -/// Request for `MemberRemove` -#[derive(Debug, PartialEq, Clone, Copy)] -pub struct MemberRemoveRequest { - /// The inner request - inner: xlineapi::MemberRemoveRequest, -} - -impl MemberRemoveRequest { - /// Creates a new `MemberRemoveRequest` - #[inline] - #[must_use] - pub fn new(id: u64) -> Self { - Self { - inner: xlineapi::MemberRemoveRequest { id }, - } - } -} - -impl From for xlineapi::MemberRemoveRequest { - #[inline] - fn from(req: MemberRemoveRequest) -> Self { - req.inner - } -} - -/// Request for `MemberUpdate` -#[derive(Debug, PartialEq)] -pub struct MemberUpdateRequest { - /// The inner request - inner: xlineapi::MemberUpdateRequest, -} - -impl MemberUpdateRequest { - /// Creates a new `MemberUpdateRequest` - #[inline] - pub fn new(id: u64, peer_ur_ls: impl Into>) -> Self { - Self { - inner: xlineapi::MemberUpdateRequest { - id, - peer_ur_ls: peer_ur_ls.into(), - }, - } - } -} - -impl From for xlineapi::MemberUpdateRequest { - #[inline] - fn from(req: MemberUpdateRequest) -> Self { - req.inner - } -} diff --git a/crates/xline-client/src/types/mod.rs b/crates/xline-client/src/types/mod.rs index 0ba876eee..b894ebc82 100644 --- a/crates/xline-client/src/types/mod.rs +++ b/crates/xline-client/src/types/mod.rs @@ -1,7 +1,5 @@ /// Auth type definitions. pub mod auth; -/// Cluster type definitions. -pub mod cluster; /// Kv type definitions. pub mod kv; /// Lease type definitions diff --git a/crates/xline/tests/it/cluster_test.rs b/crates/xline/tests/it/cluster_test.rs index a9f9087a4..9c435859e 100644 --- a/crates/xline/tests/it/cluster_test.rs +++ b/crates/xline/tests/it/cluster_test.rs @@ -2,12 +2,7 @@ use std::{error::Error, time::Duration}; use test_macros::abort_on_panic; use tokio::{net::TcpListener, time::sleep}; -use xline_client::{ - types::cluster::{ - MemberAddRequest, MemberListRequest, MemberRemoveRequest, MemberUpdateRequest, - }, - Client, ClientOptions, -}; +use xline_client::{Client, ClientOptions}; use xline_test_utils::Cluster; #[tokio::test(flavor = "multi_thread")] @@ -18,13 +13,10 @@ async fn xline_remove_node() -> Result<(), Box> { let mut cluster_client = Client::connect(cluster.all_client_addrs(), ClientOptions::default()) .await? .cluster_client(); - let list_res = cluster_client - .member_list(MemberListRequest::new(false)) - .await?; + let list_res = cluster_client.member_list(false).await?; assert_eq!(list_res.members.len(), 5); let remove_id = list_res.members[0].id; - let remove_req = MemberRemoveRequest::new(remove_id); - let remove_res = cluster_client.member_remove(remove_req).await?; + let remove_res = cluster_client.member_remove(remove_id).await?; assert_eq!(remove_res.members.len(), 4); assert!(remove_res.members.iter().all(|m| m.id != remove_id)); Ok(()) @@ -43,8 +35,7 @@ async fn xline_add_node() -> Result<(), Box> { let new_node_peer_urls = vec![format!("http://{}", new_node_peer_listener.local_addr()?)]; let new_node_client_listener = TcpListener::bind("0.0.0.0:0").await?; let new_node_client_urls = vec![format!("http://{}", new_node_client_listener.local_addr()?)]; - let add_req = MemberAddRequest::new(new_node_peer_urls.clone(), false); - let add_res = cluster_client.member_add(add_req).await?; + let add_res = cluster_client.member_add(new_node_peer_urls, false).await?; assert_eq!(add_res.members.len(), 4); cluster .run_node(new_node_client_listener, new_node_peer_listener) @@ -61,9 +52,7 @@ async fn xline_update_node() -> Result<(), Box> { let mut cluster = Cluster::new(3).await; cluster.start().await; let mut cluster_client = cluster.client().await.cluster_client(); - let old_list_res = cluster_client - .member_list(MemberListRequest::new(false)) - .await?; + let old_list_res = cluster_client.member_list(false).await?; assert_eq!(old_list_res.members.len(), 3); let update_id = old_list_res.members[0].id; let port = old_list_res.members[0] @@ -75,14 +64,12 @@ async fn xline_update_node() -> Result<(), Box> { .unwrap() .parse::() .unwrap(); - let update_req = - MemberUpdateRequest::new(update_id, vec![format!("http://localhost:{}", port)]); - let update_res = cluster_client.member_update(update_req).await?; + let update_res = cluster_client + .member_update(update_id, [format!("http://localhost:{}", port)]) + .await?; assert_eq!(update_res.members.len(), 3); sleep(Duration::from_secs(3)).await; - let new_list_res = cluster_client - .member_list(MemberListRequest::new(false)) - .await?; + let new_list_res = cluster_client.member_list(false).await?; assert_eq!(new_list_res.members.len(), 3); let old_addr = &old_list_res .members diff --git a/crates/xlinectl/src/command/member/add.rs b/crates/xlinectl/src/command/member/add.rs index e0771f66b..e16e04d97 100644 --- a/crates/xlinectl/src/command/member/add.rs +++ b/crates/xlinectl/src/command/member/add.rs @@ -1,9 +1,12 @@ use clap::{arg, ArgMatches, Command}; -use xline_client::{error::Result, types::cluster::MemberAddRequest, Client}; +use xline_client::{error::Result, Client}; use super::parse_peer_urls; use crate::utils::printer::Printer; +/// Temp type for cluster member `add` command, indicates `(peer_urls, is_learner)` +type MemberAddRequest = (Vec, bool); + /// Definition of `add` command pub(super) fn command() -> Command { Command::new("add") @@ -22,13 +25,16 @@ pub(super) fn build_request(matches: &ArgMatches) -> MemberAddRequest { .expect("required"); let is_learner = matches.get_flag("is_learner"); - MemberAddRequest::new(peer_urls.clone(), is_learner) + (peer_urls.clone(), is_learner) } /// Execute the command pub(super) async fn execute(client: &mut Client, matches: &ArgMatches) -> Result<()> { let request = build_request(matches); - let resp = client.cluster_client().member_add(request).await?; + let resp = client + .cluster_client() + .member_add(request.0, request.1) + .await?; resp.print(); Ok(()) @@ -46,12 +52,12 @@ mod tests { let test_cases = vec![ TestCase::new( vec!["add", "127.0.0.1:2379", "--is_learner"], - Some(MemberAddRequest::new(["127.0.0.1:2379".to_owned()], true)), + Some((["127.0.0.1:2379".to_owned()].into(), true)), ), TestCase::new( vec!["add", "127.0.0.1:2379,127.0.0.1:2380"], - Some(MemberAddRequest::new( - ["127.0.0.1:2379".to_owned(), "127.0.0.1:2380".to_owned()], + Some(( + ["127.0.0.1:2379".to_owned(), "127.0.0.1:2380".to_owned()].into(), false, )), ), diff --git a/crates/xlinectl/src/command/member/list.rs b/crates/xlinectl/src/command/member/list.rs index 7612783f9..269a7365d 100644 --- a/crates/xlinectl/src/command/member/list.rs +++ b/crates/xlinectl/src/command/member/list.rs @@ -1,5 +1,5 @@ use clap::{arg, ArgMatches, Command}; -use xline_client::{error::Result, types::cluster::MemberListRequest, Client}; +use xline_client::{error::Result, Client}; use crate::utils::printer::Printer; @@ -11,10 +11,8 @@ pub(super) fn command() -> Command { } /// Build request from matches -pub(super) fn build_request(matches: &ArgMatches) -> MemberListRequest { - let linearizable = matches.get_flag("linearizable"); - - MemberListRequest::new(linearizable) +pub(super) fn build_request(matches: &ArgMatches) -> bool { + matches.get_flag("linearizable") } /// Execute the command @@ -31,14 +29,14 @@ mod tests { use super::*; use crate::test_case_struct; - test_case_struct!(MemberListRequest); + test_case_struct!(bool); #[test] fn command_parse_should_be_valid() { - let test_cases = vec![TestCase::new( - vec!["list", "--linearizable"], - Some(MemberListRequest::new(true)), - )]; + let test_cases = vec![ + TestCase::new(vec!["list", "--linearizable"], Some(true)), + TestCase::new(vec!["list"], Some(false)), + ]; for case in test_cases { case.run_test(); diff --git a/crates/xlinectl/src/command/member/promote.rs b/crates/xlinectl/src/command/member/promote.rs index 4d5e9de53..3e4be7da1 100644 --- a/crates/xlinectl/src/command/member/promote.rs +++ b/crates/xlinectl/src/command/member/promote.rs @@ -1,5 +1,5 @@ use clap::{arg, value_parser, ArgMatches, Command}; -use xline_client::{error::Result, types::cluster::MemberPromoteRequest, Client}; +use xline_client::{error::Result, Client}; use crate::utils::printer::Printer; @@ -11,10 +11,8 @@ pub(super) fn command() -> Command { } /// Build request from matches -pub(super) fn build_request(matches: &ArgMatches) -> MemberPromoteRequest { - let member_id = matches.get_one::("ID").expect("required"); - - MemberPromoteRequest::new(*member_id) +pub(super) fn build_request(matches: &ArgMatches) -> u64 { + *matches.get_one::("ID").expect("required") } /// Execute the command @@ -31,14 +29,11 @@ mod tests { use super::*; use crate::test_case_struct; - test_case_struct!(MemberPromoteRequest); + test_case_struct!(u64); #[test] fn command_parse_should_be_valid() { - let test_cases = vec![TestCase::new( - vec!["remove", "1"], - Some(MemberPromoteRequest::new(1)), - )]; + let test_cases = vec![TestCase::new(vec!["remove", "1"], Some(1))]; for case in test_cases { case.run_test(); diff --git a/crates/xlinectl/src/command/member/remove.rs b/crates/xlinectl/src/command/member/remove.rs index 667e762cd..b13a49015 100644 --- a/crates/xlinectl/src/command/member/remove.rs +++ b/crates/xlinectl/src/command/member/remove.rs @@ -1,5 +1,5 @@ use clap::{arg, value_parser, ArgMatches, Command}; -use xline_client::{error::Result, types::cluster::MemberRemoveRequest, Client}; +use xline_client::{error::Result, Client}; use crate::utils::printer::Printer; @@ -11,10 +11,8 @@ pub(super) fn command() -> Command { } /// Build request from matches -pub(super) fn build_request(matches: &ArgMatches) -> MemberRemoveRequest { - let member_id = matches.get_one::("ID").expect("required"); - - MemberRemoveRequest::new(*member_id) +pub(super) fn build_request(matches: &ArgMatches) -> u64 { + *matches.get_one::("ID").expect("required") } /// Execute the command @@ -31,14 +29,11 @@ mod tests { use super::*; use crate::test_case_struct; - test_case_struct!(MemberRemoveRequest); + test_case_struct!(u64); #[test] fn command_parse_should_be_valid() { - let test_cases = vec![TestCase::new( - vec!["remove", "1"], - Some(MemberRemoveRequest::new(1)), - )]; + let test_cases = vec![TestCase::new(vec!["remove", "1"], Some(1))]; for case in test_cases { case.run_test(); diff --git a/crates/xlinectl/src/command/member/update.rs b/crates/xlinectl/src/command/member/update.rs index 59fc9f310..17db4566a 100644 --- a/crates/xlinectl/src/command/member/update.rs +++ b/crates/xlinectl/src/command/member/update.rs @@ -1,9 +1,12 @@ use clap::{arg, value_parser, ArgMatches, Command}; -use xline_client::{error::Result, types::cluster::MemberUpdateRequest, Client}; +use xline_client::{error::Result, Client}; use super::parse_peer_urls; use crate::utils::printer::Printer; +/// Temp type for request and testing, indicates `(id, peer_urls)` +type MemberUpdateRequest = (u64, Vec); + /// Definition of `update` command pub(super) fn command() -> Command { Command::new("update") @@ -22,13 +25,16 @@ pub(super) fn build_request(matches: &ArgMatches) -> MemberUpdateRequest { .get_one::>("peer_urls") .expect("required"); - MemberUpdateRequest::new(*member_id, peer_urls.clone()) + (*member_id, peer_urls.clone()) } /// Execute the command pub(super) async fn execute(client: &mut Client, matches: &ArgMatches) -> Result<()> { let request = build_request(matches); - let resp = client.cluster_client().member_update(request).await?; + let resp = client + .cluster_client() + .member_update(request.0, request.1) + .await?; resp.print(); Ok(()) @@ -46,13 +52,13 @@ mod tests { let test_cases = vec![ TestCase::new( vec!["update", "1", "127.0.0.1:2379"], - Some(MemberUpdateRequest::new(1, ["127.0.0.1:2379".to_owned()])), + Some((1, ["127.0.0.1:2379".to_owned()].into())), ), TestCase::new( vec!["update", "2", "127.0.0.1:2379,127.0.0.1:2380"], - Some(MemberUpdateRequest::new( + Some(( 2, - ["127.0.0.1:2379".to_owned(), "127.0.0.1:2380".to_owned()], + ["127.0.0.1:2379".to_owned(), "127.0.0.1:2380".to_owned()].into(), )), ), ]; From 62088fc2ead63a8f5766a2132a9eb44596b76a49 Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Thu, 18 Jul 2024 08:44:36 +0800 Subject: [PATCH 027/322] refactor(client)!: AuthClient::role_grant_permission Signed-off-by: lxl66566 --- crates/xline-client/examples/auth_role.rs | 14 +-- crates/xline-client/src/clients/auth.rs | 34 ++++-- crates/xline-client/src/types/auth.rs | 104 +++++++----------- crates/xline-client/tests/it/auth.rs | 56 +++++----- crates/xline-test-utils/src/lib.rs | 10 +- .../xlinectl/src/command/role/grant_perm.rs | 63 ++++++----- 6 files changed, 134 insertions(+), 147 deletions(-) diff --git a/crates/xline-client/examples/auth_role.rs b/crates/xline-client/examples/auth_role.rs index 70e146f72..a23e686a8 100644 --- a/crates/xline-client/examples/auth_role.rs +++ b/crates/xline-client/examples/auth_role.rs @@ -1,8 +1,6 @@ use anyhow::Result; use xline_client::{ - types::auth::{ - AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, Permission, PermissionType, - }, + types::auth::{AuthRoleRevokePermissionRequest, PermissionType}, Client, ClientOptions, }; @@ -21,16 +19,10 @@ async fn main() -> Result<()> { // grant permissions to roles client - .role_grant_permission(AuthRoleGrantPermissionRequest::new( - "role1", - Permission::new(PermissionType::Read, "key1"), - )) + .role_grant_permission("role1", PermissionType::Read, "key1", None) .await?; client - .role_grant_permission(AuthRoleGrantPermissionRequest::new( - "role2", - Permission::new(PermissionType::Readwrite, "key2"), - )) + .role_grant_permission("role2", PermissionType::Readwrite, "key2", None) .await?; // list all roles and their permissions diff --git a/crates/xline-client/src/clients/auth.rs b/crates/xline-client/src/clients/auth.rs index b4f845e8a..44038cc28 100644 --- a/crates/xline-client/src/clients/auth.rs +++ b/crates/xline-client/src/clients/auth.rs @@ -13,7 +13,10 @@ use xlineapi::{ use crate::{ error::{Result, XlineClientError}, - types::auth::{AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest}, + types::{ + auth::{AuthRoleRevokePermissionRequest, Permission, PermissionType}, + range_end::RangeOption, + }, AuthService, CurpClient, }; @@ -660,7 +663,7 @@ impl AuthClient { /// /// ```no_run /// use xline_client::{ - /// types::auth::{AuthRoleGrantPermissionRequest, Permission, PermissionType}, + /// types::auth::{Permission, PermissionType}, /// Client, ClientOptions, /// }; /// use anyhow::Result; @@ -676,10 +679,12 @@ impl AuthClient { /// // add the role and key /// /// client - /// .role_grant_permission(AuthRoleGrantPermissionRequest::new( + /// .role_grant_permission( /// "role", - /// Permission::new(PermissionType::Read, "key"), - /// )) + /// PermissionType::Read, + /// "key", + /// None + /// ) /// .await?; /// /// Ok(()) @@ -688,14 +693,19 @@ impl AuthClient { #[inline] pub async fn role_grant_permission( &self, - request: AuthRoleGrantPermissionRequest, + name: impl Into, + perm_type: PermissionType, + perm_key: impl Into>, + range_option: Option, ) -> Result { - if request.inner.perm.is_none() { - return Err(XlineClientError::InvalidArgs(String::from( - "Permission not given", - ))); - } - self.handle_req(request.inner, false).await + self.handle_req( + xlineapi::AuthRoleGrantPermissionRequest { + name: name.into(), + perm: Some(Permission::new(perm_type, perm_key.into(), range_option).into()), + }, + false, + ) + .await } /// Revokes role permission. diff --git a/crates/xline-client/src/types/auth.rs b/crates/xline-client/src/types/auth.rs index ca10dc170..87291ee57 100644 --- a/crates/xline-client/src/types/auth.rs +++ b/crates/xline-client/src/types/auth.rs @@ -8,35 +8,7 @@ pub use xlineapi::{ AuthenticateResponse, Type as PermissionType, }; -/// Request for `AuthRoleGrantPermission` -#[derive(Debug, PartialEq)] -pub struct AuthRoleGrantPermissionRequest { - /// Inner request - pub(crate) inner: xlineapi::AuthRoleGrantPermissionRequest, -} - -impl AuthRoleGrantPermissionRequest { - /// Creates a new `AuthRoleGrantPermissionRequest` - /// - /// `role` is the name of the role to grant permission, - /// `perm` is the permission name to grant. - #[inline] - pub fn new(role: impl Into, perm: Permission) -> Self { - Self { - inner: xlineapi::AuthRoleGrantPermissionRequest { - name: role.into(), - perm: Some(perm.into()), - }, - } - } -} - -impl From for xlineapi::AuthRoleGrantPermissionRequest { - #[inline] - fn from(req: AuthRoleGrantPermissionRequest) -> Self { - req.inner - } -} +use super::range_end::RangeOption; /// Request for `AuthRoleRevokePermission` #[derive(Debug, PartialEq)] @@ -107,6 +79,8 @@ impl From for xlineapi::AuthRoleRevokePermissio pub struct Permission { /// The inner Permission inner: xlineapi::Permission, + /// The range option + range_option: Option, } impl Permission { @@ -114,55 +88,57 @@ impl Permission { /// /// `perm_type` is the permission type, /// `key` is the key to grant with the permission. + /// `range_option` is the range option of how to get `range_end` from key. #[inline] #[must_use] - pub fn new(perm_type: PermissionType, key: impl Into>) -> Self { - Self { - inner: xlineapi::Permission { - perm_type: perm_type.into(), - key: key.into(), - ..Default::default() - }, - } + pub fn new( + perm_type: PermissionType, + key: impl Into>, + range_option: Option, + ) -> Self { + Self::from((perm_type, key.into(), range_option)) } +} - /// If set, Xline will return all keys with the matching prefix +impl From for xlineapi::Permission { #[inline] - #[must_use] - pub fn with_prefix(mut self) -> Self { - if self.inner.key.is_empty() { - self.inner.key = vec![0]; - self.inner.range_end = vec![0]; - } else { - self.inner.range_end = KeyRange::get_prefix(&self.inner.key); - } - self + fn from(mut perm: Permission) -> Self { + perm.inner.range_end = perm + .range_option + .unwrap_or_default() + .get_range_end(&mut perm.inner.key); + perm.inner } +} - /// If set, Xline will return all keys that are equal or greater than the given key +impl PartialEq for Permission { #[inline] - #[must_use] - pub fn with_from_key(mut self) -> Self { - if self.inner.key.is_empty() { - self.inner.key = vec![0]; - } - self.inner.range_end = vec![0]; - self + fn eq(&self, other: &Self) -> bool { + self.inner == other.inner && self.range_option == other.range_option } +} - /// `range_end` is the upper bound on the requested range \[key,` range_en`d). - /// If `range_end` is '\0', the range is all keys >= key. +impl Eq for Permission {} + +impl From<(PermissionType, Vec, Option)> for Permission { #[inline] - #[must_use] - pub fn with_range_end(mut self, range_end: impl Into>) -> Self { - self.inner.range_end = range_end.into(); - self + fn from( + (perm_type, key, range_option): (PermissionType, Vec, Option), + ) -> Self { + Permission { + inner: xlineapi::Permission { + perm_type: perm_type.into(), + key, + ..Default::default() + }, + range_option, + } } } -impl From for xlineapi::Permission { +impl From<(PermissionType, &str, Option)> for Permission { #[inline] - fn from(perm: Permission) -> Self { - perm.inner + fn from(value: (PermissionType, &str, Option)) -> Self { + Self::from((value.0, value.1.as_bytes().to_vec(), value.2)) } } diff --git a/crates/xline-client/tests/it/auth.rs b/crates/xline-client/tests/it/auth.rs index dad88d0b5..ecd77e5b0 100644 --- a/crates/xline-client/tests/it/auth.rs +++ b/crates/xline-client/tests/it/auth.rs @@ -1,8 +1,9 @@ //! The following tests are originally from `etcd-client` use xline_client::{ error::Result, - types::auth::{ - AuthRoleGrantPermissionRequest, AuthRoleRevokePermissionRequest, Permission, PermissionType, + types::{ + auth::{AuthRoleRevokePermissionRequest, Permission, PermissionType}, + range_end::RangeOption, }, }; @@ -42,38 +43,39 @@ async fn permission_operations_should_success_in_normal_path() -> Result<()> { let client = client.auth_client(); let role1 = "role1"; - let perm1 = Permission::new(PermissionType::Read, "123"); - let perm2 = Permission::new(PermissionType::Write, "abc").with_from_key(); - let perm3 = Permission::new(PermissionType::Readwrite, "hi").with_range_end("hjj"); - let perm4 = Permission::new(PermissionType::Write, "pp").with_prefix(); - let perm5 = Permission::new(PermissionType::Read, vec![0]).with_from_key(); + let perm1 = (PermissionType::Read, "123", None); + let perm2 = (PermissionType::Write, "abc", Some(RangeOption::FromKey)); + let perm3 = ( + PermissionType::Readwrite, + "hi", + Some(RangeOption::RangeEnd("hjj".into())), + ); + let perm4 = (PermissionType::Write, "pp", Some(RangeOption::Prefix)); + let perm5 = (PermissionType::Read, vec![0], Some(RangeOption::FromKey)); client.role_add(role1).await?; - client - .role_grant_permission(AuthRoleGrantPermissionRequest::new(role1, perm1.clone())) - .await?; - client - .role_grant_permission(AuthRoleGrantPermissionRequest::new(role1, perm2.clone())) - .await?; - client - .role_grant_permission(AuthRoleGrantPermissionRequest::new(role1, perm3.clone())) - .await?; - client - .role_grant_permission(AuthRoleGrantPermissionRequest::new(role1, perm4.clone())) - .await?; - client - .role_grant_permission(AuthRoleGrantPermissionRequest::new(role1, perm5.clone())) - .await?; + let (p1, p2, p3) = perm1.clone(); + client.role_grant_permission(role1, p1, p2, p3).await?; + let (p1, p2, p3) = perm2.clone(); + client.role_grant_permission(role1, p1, p2, p3).await?; + let (p1, p2, p3) = perm3.clone(); + client.role_grant_permission(role1, p1, p2, p3).await?; + let (p1, p2, p3) = perm4.clone(); + client.role_grant_permission(role1, p1, p2, p3).await?; + let (p1, p2, p3) = perm5.clone(); + client.role_grant_permission(role1, p1, p2, p3).await?; { + // get permissions for role1, and validate the result let resp = client.role_get(role1).await?; let permissions = resp.perm; - assert!(permissions.contains(&perm1.into())); - assert!(permissions.contains(&perm2.into())); - assert!(permissions.contains(&perm3.into())); - assert!(permissions.contains(&perm4.into())); - assert!(permissions.contains(&perm5.into())); + + assert!(permissions.contains(&Permission::from(perm1).into())); + assert!(permissions.contains(&Permission::from(perm2).into())); + assert!(permissions.contains(&Permission::from(perm3).into())); + assert!(permissions.contains(&Permission::from(perm4).into())); + assert!(permissions.contains(&Permission::from(perm5).into())); } // revoke all permission diff --git a/crates/xline-test-utils/src/lib.rs b/crates/xline-test-utils/src/lib.rs index 6a293036f..b3135bf24 100644 --- a/crates/xline-test-utils/src/lib.rs +++ b/crates/xline-test-utils/src/lib.rs @@ -14,7 +14,7 @@ use utils::config::{ LogConfig, MetricsConfig, StorageConfig, TlsConfig, TraceConfig, XlineServerConfig, }; use xline::server::XlineServer; -use xline_client::types::auth::{AuthRoleGrantPermissionRequest, Permission, PermissionType}; +use xline_client::types::{auth::PermissionType, range_end::RangeOption}; pub use xline_client::{clients, types, Client, ClientOptions}; /// Cluster @@ -350,10 +350,12 @@ pub async fn set_user( client.user_grant_role(name, role).await?; if !key.is_empty() { client - .role_grant_permission(AuthRoleGrantPermissionRequest::new( + .role_grant_permission( role, - Permission::new(PermissionType::Readwrite, key).with_range_end(range_end), - )) + PermissionType::Readwrite, + key, + Some(RangeOption::RangeEnd(range_end.to_vec())), + ) .await?; } Ok(()) diff --git a/crates/xlinectl/src/command/role/grant_perm.rs b/crates/xlinectl/src/command/role/grant_perm.rs index c4c0ac91d..d81b0f41d 100644 --- a/crates/xlinectl/src/command/role/grant_perm.rs +++ b/crates/xlinectl/src/command/role/grant_perm.rs @@ -1,13 +1,12 @@ use clap::{arg, ArgMatches, Command}; -use xline_client::{ - error::Result, - types::auth::{AuthRoleGrantPermissionRequest, Permission}, - Client, -}; +use xline_client::{error::Result, types::range_end::RangeOption, Client}; use xlineapi::Type; use crate::utils::printer::Printer; +/// Temp return type for `grant_perm` command, indicates `(name, PermissionType, key, RangeOption)` +type AuthRoleGrantPermissionRequest = (String, Type, Vec, Option); + /// Definition of `grant_perm` command pub(super) fn command() -> Command { Command::new("grant_perm") @@ -32,34 +31,36 @@ pub(super) fn build_request(matches: &ArgMatches) -> AuthRoleGrantPermissionRequ let prefix = matches.get_flag("prefix"); let from_key = matches.get_flag("from_key"); - let perm_type = match perm_type_local.as_str() { - "Read" => Type::Read, - "Write" => Type::Write, - "ReadWrite" => Type::Readwrite, + let perm_type = match perm_type_local.to_lowercase().as_str() { + "read" => Type::Read, + "write" => Type::Write, + "readwrite" => Type::Readwrite, _ => unreachable!("should be checked by clap"), }; - let mut perm = Permission::new(perm_type, key.as_bytes()); - - if let Some(range_end) = range_end { - perm = perm.with_range_end(range_end.as_bytes()); + let range_option = if prefix { + Some(RangeOption::Prefix) + } else if from_key { + Some(RangeOption::FromKey) + } else { + range_end.map(|inner| RangeOption::RangeEnd(inner.as_bytes().to_vec())) }; - if prefix { - perm = perm.with_prefix(); - } - - if from_key { - perm = perm.with_from_key(); - } - - AuthRoleGrantPermissionRequest::new(name, perm) + ( + name.to_owned(), + perm_type, + key.as_bytes().to_vec(), + range_option, + ) } /// Execute the command pub(super) async fn execute(client: &mut Client, matches: &ArgMatches) -> Result<()> { let req = build_request(matches); - let resp = client.auth_client().role_grant_permission(req).await?; + let resp = client + .auth_client() + .role_grant_permission(req.0, req.1, req.2, req.3) + .await?; resp.print(); Ok(()) @@ -77,16 +78,20 @@ mod tests { let test_cases = vec![ TestCase::new( vec!["grant_perm", "Admin", "Read", "key1", "key2"], - Some(AuthRoleGrantPermissionRequest::new( - "Admin", - Permission::new(Type::Read, "key1").with_range_end("key2"), + Some(( + "Admin".into(), + Type::Read, + "key1".into(), + Some(RangeOption::RangeEnd("key2".into())), )), ), TestCase::new( vec!["grant_perm", "Admin", "Write", "key3", "--from_key"], - Some(AuthRoleGrantPermissionRequest::new( - "Admin", - Permission::new(Type::Write, "key3").with_from_key(), + Some(( + "Admin".into(), + Type::Write, + "key3".into(), + Some(RangeOption::FromKey), )), ), ]; From 5415aa37deac22efc0b98cc20ecc371aa0a5755f Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 27 May 2024 22:26:54 +0800 Subject: [PATCH 028/322] fix: check leader transfer in lease keep alive Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/curp/src/server/curp_node.rs b/crates/curp/src/server/curp_node.rs index 2f56ee520..b1d3929d1 100644 --- a/crates/curp/src/server/curp_node.rs +++ b/crates/curp/src/server/curp_node.rs @@ -380,6 +380,9 @@ impl, RC: RoleChange> CurpNode { ) -> Result { pin_mut!(req_stream); while let Some(req) = req_stream.next().await { + // NOTE: The leader may shutdown itself in configuration change. + // We must first check this situation. + self.curp.check_leader_transfer()?; if self.curp.is_shutdown() { return Err(CurpError::shutting_down()); } From da51f6cdb5eae72b9da5fdbe22dd543ef822e231 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 05:17:00 +0000 Subject: [PATCH 029/322] chore(deps): bump syn from 2.0.63 to 2.0.65 Bumps [syn](https://github.com/dtolnay/syn) from 2.0.63 to 2.0.65. - [Release notes](https://github.com/dtolnay/syn/releases) - [Commits](https://github.com/dtolnay/syn/compare/2.0.63...2.0.65) --- updated-dependencies: - dependency-name: syn dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 60 +++++++++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ae693d84a..e31982257 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -159,7 +159,7 @@ dependencies = [ "async-trait", "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.65", "tokio", ] @@ -206,7 +206,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] @@ -223,7 +223,7 @@ checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] @@ -383,7 +383,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] @@ -521,7 +521,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] @@ -766,7 +766,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.10.0", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] @@ -788,7 +788,7 @@ checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f" dependencies = [ "darling_core 0.20.8", "quote", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] @@ -842,7 +842,7 @@ dependencies = [ "darling 0.20.8", "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] @@ -852,7 +852,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b" dependencies = [ "derive_builder_core", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] @@ -1088,7 +1088,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] @@ -1638,7 +1638,7 @@ dependencies = [ "proc-macro2", "prost-build", "quote", - "syn 2.0.63", + "syn 2.0.65", "tonic-build", ] @@ -1728,7 +1728,7 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] @@ -2087,7 +2087,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] @@ -2160,7 +2160,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" dependencies = [ "proc-macro2", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] @@ -2249,7 +2249,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.63", + "syn 2.0.65", "tempfile", ] @@ -2263,7 +2263,7 @@ dependencies = [ "itertools 0.13.0", "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] @@ -2560,7 +2560,7 @@ checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] @@ -2759,7 +2759,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] @@ -2781,9 +2781,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.63" +version = "2.0.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf5be731623ca1a1fb7d8be6f261a3be6d3e2337b8a1f97be944d020c8fcb704" +checksum = "d2863d96a84c6439701d7a38f9de935ec562c8832cc55d1dde0f513b52fad106" dependencies = [ "proc-macro2", "quote", @@ -2826,7 +2826,7 @@ version = "0.1.0" dependencies = [ "assert_cmd", "quote", - "syn 2.0.63", + "syn 2.0.65", "tokio", "workspace-hack", ] @@ -2848,7 +2848,7 @@ checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] @@ -2933,7 +2933,7 @@ checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] @@ -3057,7 +3057,7 @@ dependencies = [ "proc-macro2", "prost-build", "quote", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] @@ -3137,7 +3137,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] @@ -3388,7 +3388,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.65", "wasm-bindgen-shared", ] @@ -3422,7 +3422,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.65", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3671,7 +3671,7 @@ dependencies = [ "sha2", "smallvec", "syn 1.0.109", - "syn 2.0.63", + "syn 2.0.65", "time", "tokio", "tokio-stream 0.1.15", @@ -3878,7 +3878,7 @@ checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] @@ -3898,7 +3898,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.63", + "syn 2.0.65", ] [[package]] From 1c0af79f364f8228a04a11ccd573be093f47be4f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 05:16:49 +0000 Subject: [PATCH 030/322] chore(deps): bump serde_json from 1.0.117 to 1.0.125 Bumps [serde_json](https://github.com/serde-rs/json) from 1.0.117 to 1.0.125. - [Release notes](https://github.com/serde-rs/json/releases) - [Commits](https://github.com/serde-rs/json/compare/v1.0.117...1.0.125) --- updated-dependencies: - dependency-name: serde_json dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 5 +++-- crates/xlinectl/Cargo.toml | 2 +- crates/xlineutl/Cargo.toml | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e31982257..b04b01aae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2565,11 +2565,12 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.117" +version = "1.0.125" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +checksum = "83c8e735a073ccf5be70aa8066aa984eaf2fa000db6c8d0100ae605b366d31ed" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] diff --git a/crates/xlinectl/Cargo.toml b/crates/xlinectl/Cargo.toml index b90a9dea0..c79c5cb1a 100644 --- a/crates/xlinectl/Cargo.toml +++ b/crates/xlinectl/Cargo.toml @@ -15,7 +15,7 @@ anyhow = "1.0" clap = "4" regex = "1.10.5" serde = { version = "1.0.204", features = ["derive"] } -serde_json = "1.0.117" +serde_json = "1.0.125" shlex = "1.3.0" tokio = "1" tonic = { version = "0.5.0", package = "madsim-tonic" } diff --git a/crates/xlineutl/Cargo.toml b/crates/xlineutl/Cargo.toml index 891040207..d68f46dd8 100644 --- a/crates/xlineutl/Cargo.toml +++ b/crates/xlineutl/Cargo.toml @@ -17,7 +17,7 @@ clap = "4" crc32fast = "1.4.0" engine = { path = "../engine" } serde = { version = "1.0.204", features = ["derive"] } -serde_json = "1.0.117" +serde_json = "1.0.125" tempfile = "3.10.1" tokio = "1" utils = { path = "../utils" } From 5ee8166f84fa3cd3114ff24622a675be15ac7ec9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 05:16:38 +0000 Subject: [PATCH 031/322] chore(deps): bump crc32fast from 1.4.0 to 1.4.2 Bumps [crc32fast](https://github.com/srijs/rust-crc32fast) from 1.4.0 to 1.4.2. - [Commits](https://github.com/srijs/rust-crc32fast/compare/v1.4.0...v1.4.2) --- updated-dependencies: - dependency-name: crc32fast dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- crates/xline/Cargo.toml | 2 +- crates/xlineutl/Cargo.toml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b04b01aae..cab788fcd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -590,9 +590,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.4.0" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" dependencies = [ "cfg-if", ] diff --git a/crates/xline/Cargo.toml b/crates/xline/Cargo.toml index f6031c0af..de32c7c41 100644 --- a/crates/xline/Cargo.toml +++ b/crates/xline/Cargo.toml @@ -19,7 +19,7 @@ axum = "0.7.0" bytes = "1.4.0" clap = { version = "4", features = ["derive"] } clippy-utilities = "0.2.0" -crc32fast = "1.4.0" +crc32fast = "1.4.2" crossbeam-skiplist = "0.1.1" curp = { path = "../curp", version = "0.1.0", features = ["client-metrics"] } curp-external-api = { path = "../curp-external-api" } diff --git a/crates/xlineutl/Cargo.toml b/crates/xlineutl/Cargo.toml index d68f46dd8..5f11475d1 100644 --- a/crates/xlineutl/Cargo.toml +++ b/crates/xlineutl/Cargo.toml @@ -14,7 +14,7 @@ keywords = ["Client", "CommandLine"] [dependencies] anyhow = "1.0" clap = "4" -crc32fast = "1.4.0" +crc32fast = "1.4.2" engine = { path = "../engine" } serde = { version = "1.0.204", features = ["derive"] } serde_json = "1.0.125" From 40e55f2fe3cbd648fc170c5b29748f08db292b9c Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 7 Aug 2024 10:06:02 +0800 Subject: [PATCH 032/322] fix: update madsim to fix stream early close issue Ref: https://github.com/madsim-rs/madsim/pull/218 Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- Cargo.lock | 132 ++++++++++++++++++++++++++++---------- Cargo.toml | 8 +-- workspace-hack/Cargo.toml | 4 +- 3 files changed, 105 insertions(+), 39 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c169ad2af..6bcadb425 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -672,9 +672,9 @@ dependencies = [ "futures", "indexmap 2.2.6", "itertools 0.13.0", - "madsim", - "madsim-tokio", - "madsim-tonic", + "madsim 0.2.30", + "madsim-tokio 0.2.28", + "madsim-tonic 0.4.2+0.10.0", "madsim-tonic-build", "mockall", "once_cell", @@ -721,7 +721,7 @@ dependencies = [ "curp-external-api", "engine", "itertools 0.13.0", - "madsim-tokio", + "madsim-tokio 0.2.28", "prost", "serde", "thiserror", @@ -928,7 +928,7 @@ dependencies = [ "bincode", "bytes", "clippy-utilities", - "madsim-tokio", + "madsim-tokio 0.2.28", "opentelemetry 0.21.0", "parking_lot", "rocksdb", @@ -1559,8 +1559,8 @@ dependencies = [ [[package]] name = "madsim" -version = "0.2.27" -source = "git+https://github.com/Phoenix500526/madsim.git?branch=update-tonic#4df254ae43fe7921a8403873460005379ccb8247" +version = "0.2.30" +source = "git+https://github.com/bsbds/madsim.git?branch=fix-client-stream#831b320ed47a1c202646fd25e879a0ad61cd374d" dependencies = [ "ahash", "async-channel", @@ -1572,7 +1572,7 @@ dependencies = [ "futures-util", "lazy_static", "libc", - "madsim-macros", + "madsim-macros 0.2.12 (git+https://github.com/Phoenix500526/madsim.git?branch=update-tonic)", "naive-timer", "panic-message", "rand", @@ -1587,10 +1587,51 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "madsim" +version = "0.2.30" +source = "git+https://github.com/bsbds/madsim.git?branch=fix-client-stream#831b320ed47a1c202646fd25e879a0ad61cd374d" +dependencies = [ + "ahash", + "async-channel", + "async-stream", + "async-task", + "bincode", + "bytes", + "downcast-rs", + "futures-util", + "lazy_static", + "libc", + "madsim-macros 0.2.12 (git+https://github.com/bsbds/madsim.git?branch=fix-client-stream)", + "naive-timer", + "panic-message", + "rand", + "rand_xoshiro", + "rustversion", + "serde", + "spin", + "tokio", + "tokio-util", + "toml", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "madsim-macros" +version = "0.2.12" +source = "git+https://github.com/bsbds/madsim.git?branch=fix-client-stream#831b320ed47a1c202646fd25e879a0ad61cd374d" +dependencies = [ + "darling 0.14.4", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "madsim-macros" version = "0.2.12" -source = "git+https://github.com/Phoenix500526/madsim.git?branch=update-tonic#4df254ae43fe7921a8403873460005379ccb8247" +source = "git+https://github.com/bsbds/madsim.git?branch=fix-client-stream#831b320ed47a1c202646fd25e879a0ad61cd374d" dependencies = [ "darling 0.14.4", "proc-macro2", @@ -1600,23 +1641,48 @@ dependencies = [ [[package]] name = "madsim-tokio" -version = "0.2.25" -source = "git+https://github.com/Phoenix500526/madsim.git?branch=update-tonic#4df254ae43fe7921a8403873460005379ccb8247" +version = "0.2.28" +source = "git+https://github.com/bsbds/madsim.git?branch=fix-client-stream#831b320ed47a1c202646fd25e879a0ad61cd374d" +dependencies = [ + "madsim 0.2.27", + "spin", + "tokio", +] + +[[package]] +name = "madsim-tokio" +version = "0.2.28" +source = "git+https://github.com/bsbds/madsim.git?branch=fix-client-stream#831b320ed47a1c202646fd25e879a0ad61cd374d" dependencies = [ - "madsim", + "madsim 0.2.30", "spin", "tokio", ] [[package]] name = "madsim-tonic" -version = "0.4.2+0.11.0" -source = "git+https://github.com/Phoenix500526/madsim.git?branch=update-tonic#4df254ae43fe7921a8403873460005379ccb8247" +version = "0.4.2+0.10.0" +source = "git+https://github.com/bsbds/madsim.git?branch=fix-client-stream#831b320ed47a1c202646fd25e879a0ad61cd374d" +dependencies = [ + "async-stream", + "chrono", + "futures-util", + "madsim 0.2.30", + "tokio", + "tonic", + "tower", + "tracing", +] + +[[package]] +name = "madsim-tonic" +version = "0.4.2+0.10.0" +source = "git+https://github.com/bsbds/madsim.git?branch=fix-client-stream#831b320ed47a1c202646fd25e879a0ad61cd374d" dependencies = [ "async-stream", "chrono", "futures-util", - "madsim", + "madsim 0.2.27", "tokio", "tonic", "tower", @@ -1625,8 +1691,8 @@ dependencies = [ [[package]] name = "madsim-tonic-build" -version = "0.4.3+0.11.0" -source = "git+https://github.com/Phoenix500526/madsim.git?branch=update-tonic#4df254ae43fe7921a8403873460005379ccb8247" +version = "0.4.3+0.10.0" +source = "git+https://github.com/bsbds/madsim.git?branch=fix-client-stream#831b320ed47a1c202646fd25e879a0ad61cd374d" dependencies = [ "prettyplease", "proc-macro2", @@ -2747,9 +2813,9 @@ dependencies = [ "engine", "futures", "itertools 0.13.0", - "madsim", - "madsim-tokio", - "madsim-tonic", + "madsim 0.2.30", + "madsim-tokio 0.2.28", + "madsim-tonic 0.4.2+0.10.0", "madsim-tonic-build", "parking_lot", "prost", @@ -3075,7 +3141,7 @@ version = "0.1.12" source = "git+https://github.com/madsim-rs/tokio.git?rev=ab251ad#ab251ad1fae8e16d9a1df74e301dbf3ed9d4d3af" dependencies = [ "futures-core", - "madsim-tokio", + "madsim-tokio 0.2.28", "pin-project-lite", ] @@ -3420,8 +3486,8 @@ dependencies = [ "event-listener", "futures", "getset", - "madsim-tokio", - "madsim-tonic", + "madsim-tokio 0.2.28", + "madsim-tonic 0.4.2+0.10.0", "opentelemetry 0.22.0", "opentelemetry-jaeger", "opentelemetry-otlp", @@ -3785,8 +3851,8 @@ dependencies = [ "itertools 0.12.1", "libc", "log", - "madsim-tokio", - "madsim-tonic", + "madsim-tokio 0.2.25", + "madsim-tonic 0.4.2+0.11.0", "memchr", "num-traits", "opentelemetry_sdk 0.22.1", @@ -3852,8 +3918,8 @@ dependencies = [ "itertools 0.13.0", "jsonwebtoken", "log", - "madsim-tokio", - "madsim-tonic", + "madsim-tokio 0.2.28", + "madsim-tonic 0.4.2+0.10.0", "madsim-tonic-build", "merged_range", "mockall", @@ -3903,8 +3969,8 @@ dependencies = [ "futures", "getrandom", "http", - "madsim-tokio", - "madsim-tonic", + "madsim-tokio 0.2.28", + "madsim-tonic 0.4.2+0.10.0", "rand", "test-macros", "thiserror", @@ -3920,8 +3986,8 @@ name = "xline-test-utils" version = "0.1.0" dependencies = [ "futures", - "madsim-tokio", - "madsim-tonic", + "madsim-tokio 0.2.28", + "madsim-tonic 0.4.2+0.10.0", "rand", "utils", "workspace-hack", @@ -3937,7 +4003,7 @@ dependencies = [ "curp", "curp-external-api", "itertools 0.13.0", - "madsim-tonic", + "madsim-tonic 0.4.2+0.10.0", "madsim-tonic-build", "prost", "serde", @@ -3954,7 +4020,7 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", - "madsim-tonic", + "madsim-tonic 0.4.2+0.10.0", "regex", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index e0220e105..cebe177e1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,7 @@ ignored = ["prost", "workspace-hack"] [patch.crates-io] # This branch update the tonic version for madsim. We should switch to the original etcd-client crate when new version release. -madsim = { git = "https://github.com/Phoenix500526/madsim.git", branch = "update-tonic" } -madsim-tonic = { git = "https://github.com/Phoenix500526/madsim.git", branch = "update-tonic" } -madsim-tonic-build = { git = "https://github.com/Phoenix500526/madsim.git", branch = "update-tonic" } -madsim-tokio = { git = "https://github.com/Phoenix500526/madsim.git", branch = "update-tonic" } +madsim = { git = "https://github.com/bsbds/madsim.git", branch = "fix-client-stream" } +madsim-tonic = { git = "https://github.com/bsbds/madsim.git", branch = "fix-client-stream" } +madsim-tonic-build = { git = "https://github.com/bsbds/madsim.git", branch = "fix-client-stream" } +madsim-tokio = { git = "https://github.com/bsbds/madsim.git", branch = "fix-client-stream" } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 6b4d31d24..7eec178ae 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -24,8 +24,8 @@ futures-util = { version = "0.3", features = ["channel", "io", "sink"] } getrandom = { version = "0.2", default-features = false, features = ["js", "rdrand", "std"] } libc = { version = "0.2", features = ["extra_traits"] } log = { version = "0.4", default-features = false, features = ["std"] } -madsim-tokio = { git = "https://github.com/Phoenix500526/madsim.git", branch = "update-tonic", default-features = false, features = ["fs", "io-util", "macros", "net", "rt", "rt-multi-thread", "signal", "sync", "time"] } -madsim-tonic = { git = "https://github.com/Phoenix500526/madsim.git", branch = "update-tonic", default-features = false, features = ["tls"] } +madsim-tokio = { git = "https://github.com/bsbds/madsim.git", branch = "fix-client-stream", default-features = false, features = ["fs", "io-util", "macros", "net", "rt", "rt-multi-thread", "signal", "sync", "time"] } +madsim-tonic = { git = "https://github.com/bsbds/madsim.git", branch = "fix-client-stream", default-features = false, features = ["tls"] } memchr = { version = "2" } num-traits = { version = "0.2", default-features = false, features = ["i128", "std"] } opentelemetry_sdk = { version = "0.22", features = ["metrics", "rt-tokio"] } From 3577254c65670e0860e4cf0f50bf9d2c9e38b8a9 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 19 Aug 2024 15:40:18 +0800 Subject: [PATCH 033/322] fix: Cargo.lock Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- Cargo.lock | 114 +++++++++++------------------------------------------ 1 file changed, 24 insertions(+), 90 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6bcadb425..7240258cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -672,9 +672,9 @@ dependencies = [ "futures", "indexmap 2.2.6", "itertools 0.13.0", - "madsim 0.2.30", - "madsim-tokio 0.2.28", - "madsim-tonic 0.4.2+0.10.0", + "madsim", + "madsim-tokio", + "madsim-tonic", "madsim-tonic-build", "mockall", "once_cell", @@ -721,7 +721,7 @@ dependencies = [ "curp-external-api", "engine", "itertools 0.13.0", - "madsim-tokio 0.2.28", + "madsim-tokio", "prost", "serde", "thiserror", @@ -928,7 +928,7 @@ dependencies = [ "bincode", "bytes", "clippy-utilities", - "madsim-tokio 0.2.28", + "madsim-tokio", "opentelemetry 0.21.0", "parking_lot", "rocksdb", @@ -1572,37 +1572,7 @@ dependencies = [ "futures-util", "lazy_static", "libc", - "madsim-macros 0.2.12 (git+https://github.com/Phoenix500526/madsim.git?branch=update-tonic)", - "naive-timer", - "panic-message", - "rand", - "rand_xoshiro", - "rustversion", - "serde", - "spin", - "tokio", - "tokio-util", - "toml", - "tracing", - "tracing-subscriber", -] - -[[package]] -name = "madsim" -version = "0.2.30" -source = "git+https://github.com/bsbds/madsim.git?branch=fix-client-stream#831b320ed47a1c202646fd25e879a0ad61cd374d" -dependencies = [ - "ahash", - "async-channel", - "async-stream", - "async-task", - "bincode", - "bytes", - "downcast-rs", - "futures-util", - "lazy_static", - "libc", - "madsim-macros 0.2.12 (git+https://github.com/bsbds/madsim.git?branch=fix-client-stream)", + "madsim-macros", "naive-timer", "panic-message", "rand", @@ -1628,52 +1598,16 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "madsim-macros" -version = "0.2.12" -source = "git+https://github.com/bsbds/madsim.git?branch=fix-client-stream#831b320ed47a1c202646fd25e879a0ad61cd374d" -dependencies = [ - "darling 0.14.4", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "madsim-tokio" version = "0.2.28" source = "git+https://github.com/bsbds/madsim.git?branch=fix-client-stream#831b320ed47a1c202646fd25e879a0ad61cd374d" dependencies = [ - "madsim 0.2.27", + "madsim", "spin", "tokio", ] -[[package]] -name = "madsim-tokio" -version = "0.2.28" -source = "git+https://github.com/bsbds/madsim.git?branch=fix-client-stream#831b320ed47a1c202646fd25e879a0ad61cd374d" -dependencies = [ - "madsim 0.2.30", - "spin", - "tokio", -] - -[[package]] -name = "madsim-tonic" -version = "0.4.2+0.10.0" -source = "git+https://github.com/bsbds/madsim.git?branch=fix-client-stream#831b320ed47a1c202646fd25e879a0ad61cd374d" -dependencies = [ - "async-stream", - "chrono", - "futures-util", - "madsim 0.2.30", - "tokio", - "tonic", - "tower", - "tracing", -] - [[package]] name = "madsim-tonic" version = "0.4.2+0.10.0" @@ -1682,7 +1616,7 @@ dependencies = [ "async-stream", "chrono", "futures-util", - "madsim 0.2.27", + "madsim", "tokio", "tonic", "tower", @@ -2813,9 +2747,9 @@ dependencies = [ "engine", "futures", "itertools 0.13.0", - "madsim 0.2.30", - "madsim-tokio 0.2.28", - "madsim-tonic 0.4.2+0.10.0", + "madsim", + "madsim-tokio", + "madsim-tonic", "madsim-tonic-build", "parking_lot", "prost", @@ -3141,7 +3075,7 @@ version = "0.1.12" source = "git+https://github.com/madsim-rs/tokio.git?rev=ab251ad#ab251ad1fae8e16d9a1df74e301dbf3ed9d4d3af" dependencies = [ "futures-core", - "madsim-tokio 0.2.28", + "madsim-tokio", "pin-project-lite", ] @@ -3486,8 +3420,8 @@ dependencies = [ "event-listener", "futures", "getset", - "madsim-tokio 0.2.28", - "madsim-tonic 0.4.2+0.10.0", + "madsim-tokio", + "madsim-tonic", "opentelemetry 0.22.0", "opentelemetry-jaeger", "opentelemetry-otlp", @@ -3851,8 +3785,8 @@ dependencies = [ "itertools 0.12.1", "libc", "log", - "madsim-tokio 0.2.25", - "madsim-tonic 0.4.2+0.11.0", + "madsim-tokio", + "madsim-tonic", "memchr", "num-traits", "opentelemetry_sdk 0.22.1", @@ -3918,8 +3852,8 @@ dependencies = [ "itertools 0.13.0", "jsonwebtoken", "log", - "madsim-tokio 0.2.28", - "madsim-tonic 0.4.2+0.10.0", + "madsim-tokio", + "madsim-tonic", "madsim-tonic-build", "merged_range", "mockall", @@ -3969,8 +3903,8 @@ dependencies = [ "futures", "getrandom", "http", - "madsim-tokio 0.2.28", - "madsim-tonic 0.4.2+0.10.0", + "madsim-tokio", + "madsim-tonic", "rand", "test-macros", "thiserror", @@ -3986,8 +3920,8 @@ name = "xline-test-utils" version = "0.1.0" dependencies = [ "futures", - "madsim-tokio 0.2.28", - "madsim-tonic 0.4.2+0.10.0", + "madsim-tokio", + "madsim-tonic", "rand", "utils", "workspace-hack", @@ -4003,7 +3937,7 @@ dependencies = [ "curp", "curp-external-api", "itertools 0.13.0", - "madsim-tonic 0.4.2+0.10.0", + "madsim-tonic", "madsim-tonic-build", "prost", "serde", @@ -4020,7 +3954,7 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", - "madsim-tonic 0.4.2+0.10.0", + "madsim-tonic", "regex", "serde", "serde_json", From 4c99c46cd9e466cb57a67213004a2863960f6ce6 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 23 Aug 2024 09:05:13 +0800 Subject: [PATCH 034/322] fix: madsim tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> fix: compaction in madsim Run the compaction task synchronously in madsim, please refer to `compact_bg_task` for the madsim compaction code Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> fix: not waiting for client id in madsim tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> fix: set leader in simulation xline group Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/mod.rs | 12 ++++- crates/simulation/src/xline_group.rs | 2 +- .../tests/it/curp/server_recovery.rs | 18 +++----- crates/xline/src/storage/kv_store.rs | 46 +++++++++++++------ 4 files changed, 51 insertions(+), 27 deletions(-) diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 378b432d8..739fd9674 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -451,15 +451,23 @@ impl ClientBuilder { impl ClientApi + Send + Sync + 'static, Arc, ), - tonic::transport::Error, + tonic::Status, > { - let state = Arc::new(self.init_state_builder().build().await?); + let state = Arc::new( + self.init_state_builder() + .build() + .await + .map_err(|e| tonic::Status::internal(e.to_string()))?, + ); + let client = Retry::new( Unary::new(Arc::clone(&state), self.init_unary_config()), self.init_retry_config(), Some(self.spawn_bg_tasks(Arc::clone(&state))), ); let client_id = state.clone_client_id(); + self.wait_for_client_id(state).await?; + Ok((client, client_id)) } } diff --git a/crates/simulation/src/xline_group.rs b/crates/simulation/src/xline_group.rs index eb97322d2..0f61892b5 100644 --- a/crates/simulation/src/xline_group.rs +++ b/crates/simulation/src/xline_group.rs @@ -55,7 +55,7 @@ impl XlineGroup { vec!["0.0.0.0:2379".to_owned()], vec![format!("192.168.1.{}:2379", i + 1)], all.clone(), - false, + i == 0, CurpConfig::default(), ClientConfig::default(), ServerTimeout::default(), diff --git a/crates/simulation/tests/it/curp/server_recovery.rs b/crates/simulation/tests/it/curp/server_recovery.rs index e14abd406..7e8a88ccf 100644 --- a/crates/simulation/tests/it/curp/server_recovery.rs +++ b/crates/simulation/tests/it/curp/server_recovery.rs @@ -116,9 +116,15 @@ async fn leader_and_follower_both_crash_and_recovery() { let follower = *group.nodes.keys().find(|&id| id != &leader).unwrap(); group.crash(follower).await; + let _wait_up = client + .propose(TestCommand::new_get(vec![0]), true) + .await + .unwrap() + .unwrap(); + assert_eq!( client - .propose(TestCommand::new_put(vec![0], 0), true) + .propose(TestCommand::new_put(vec![0], 0), false) .await .unwrap() .unwrap() @@ -126,16 +132,6 @@ async fn leader_and_follower_both_crash_and_recovery() { .values, Vec::::new(), ); - assert_eq!( - client - .propose(TestCommand::new_get(vec![0]), true) - .await - .unwrap() - .unwrap() - .0 - .values, - vec![0] - ); group.crash(leader).await; diff --git a/crates/xline/src/storage/kv_store.rs b/crates/xline/src/storage/kv_store.rs index 44a0cac04..7b92043d9 100644 --- a/crates/xline/src/storage/kv_store.rs +++ b/crates/xline/src/storage/kv_store.rs @@ -1124,23 +1124,43 @@ impl KvStore { let ops = vec![WriteOp::PutScheduledCompactRevision(revision)]; // TODO: Remove the physical process logic here. It's better to move into the // KvServer - #[cfg_attr(madsim, allow(unused))] - let (event, listener) = if req.physical { - let event = Arc::new(event_listener::Event::new()); - let listener = event.listen(); - (Some(event), Some(listener)) - } else { - (None, None) - }; // TODO: sync compaction task - if let Err(e) = self.compact_task_tx.send((revision, event)) { - panic!("the compactor exited unexpectedly: {e:?}"); - } // FIXME: madsim is single threaded, we cannot use synchronous wait here #[cfg(not(madsim))] - if let Some(listener) = listener { - listener.wait(); + { + let (event, listener) = if req.physical { + let event = Arc::new(event_listener::Event::new()); + let listener = event.listen(); + (Some(event), Some(listener)) + } else { + (None, None) + }; + if let Err(e) = self.compact_task_tx.send((revision, event)) { + panic!("the compactor exited unexpectedly: {e:?}"); + } + if let Some(listener) = listener { + listener.wait(); + } + } + #[cfg(madsim)] + { + let index = self.index(); + let target_revisions = index + .compact(revision) + .into_iter() + .map(|key_rev| key_rev.as_revision().encode_to_vec()) + .collect::>>(); + // Given that the Xline uses a lim-tree database with smaller write amplification as the storage backend , does using progressive compaction really good at improving performance? + for revision_chunk in target_revisions.chunks(1000) { + if let Err(e) = self.compact(revision_chunk) { + panic!("failed to compact revision chunk {revision_chunk:?} due to {e}"); + } + } + if let Err(e) = self.compact_finished(revision) { + panic!("failed to set finished compact revision {revision:?} due to {e}"); + } } + self.inner.db.write_ops(ops)?; let resp = to_execute From fdb5bb29f089586f140b8a662c17d8903ad0d6e9 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 8 Jul 2024 19:12:43 +0800 Subject: [PATCH 035/322] test: rewrite tests for curp client Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/tests.rs | 204 ++++++++++++++------------------ 1 file changed, 90 insertions(+), 114 deletions(-) diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index 32c177183..9db79c303 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -1,10 +1,10 @@ use std::{ collections::HashMap, - sync::{atomic::AtomicU64, Arc}, - time::Duration, + sync::{atomic::AtomicU64, Arc, Mutex}, + time::{Duration, Instant}, }; -use curp_test_utils::test_cmd::{TestCommand, TestCommandResult}; +use curp_test_utils::test_cmd::{LogIndexResult, TestCommand, TestCommandResult}; use futures::{future::BoxFuture, Stream}; #[cfg(not(madsim))] use tonic::transport::ClientTlsConfig; @@ -19,7 +19,10 @@ use super::{ unary::{Unary, UnaryConfig}, }; use crate::{ - client::ClientApi, + client::{ + retry::{Retry, RetryConfig}, + ClientApi, + }, members::ServerId, rpc::{ connect::{ConnectApi, MockConnectApi}, @@ -257,7 +260,8 @@ async fn test_unary_fetch_clusters_linearizable_failed() { }); let unary = init_unary_client(connects, None, None, 0, 0, None); let res = unary.fetch_cluster(true).await.unwrap_err(); - // only server(0, 1)'s responses are valid, less than majority quorum(3), got a mocked RpcTransport to retry + // only server(0, 1)'s responses are valid, less than majority quorum(3), got a + // mocked RpcTransport to retry assert_eq!(res, CurpError::RpcTransport(())); } @@ -276,79 +280,71 @@ fn build_synced_response() -> OpResponse { // TODO: rewrite this tests #[cfg(ignore)] +fn build_empty_response() -> OpResponse { + OpResponse { op: None } +} + #[traced_test] #[tokio::test] async fn test_unary_propose_fast_path_works() { let connects = init_mocked_connects(5, |id, conn| { - conn.expect_propose() + conn.expect_propose_stream() .return_once(move |_req, _token, _timeout| { - let resp = match id { - 0 => ProposeResponse::new_result::( - &Ok(TestCommandResult::default()), - false, - ), - 1 | 2 | 3 => ProposeResponse::new_empty(), - 4 => return Err(CurpError::key_conflict()), - _ => unreachable!("there are only 5 nodes"), + assert_eq!(id, 0, "followers should not receive propose"); + let resp = async_stream::stream! { + yield Ok(build_propose_response(false)); + yield Ok(build_synced_response()); }; - Ok(tonic::Response::new(resp)) - }); - conn.expect_wait_synced() - .return_once(move |_req, _timeout| { - assert!(id == 0, "wait synced should send to leader"); - std::thread::sleep(Duration::from_millis(100)); - Ok(tonic::Response::new(WaitSyncedResponse::new_from_result::< - TestCommand, - >( - Ok(TestCommandResult::default()), - Some(Ok(1.into())), - ))) + Ok(tonic::Response::new(Box::new(resp))) }); + conn.expect_record().return_once(move |_req, _timeout| { + let resp = match id { + 0 => unreachable!("leader should not receive record request"), + 1 | 2 | 3 => RecordResponse { conflict: false }, + 4 => RecordResponse { conflict: true }, + _ => unreachable!("there are only 5 nodes"), + }; + Ok(tonic::Response::new(resp)) + }); }); let unary = init_unary_client(connects, None, Some(0), 1, 0, None); let res = unary - .propose(&TestCommand::default(), None, true) + .propose(&TestCommand::new_put(vec![1], 1), None, true) .await .unwrap() .unwrap(); assert_eq!(res, (TestCommandResult::default(), None)); } -// TODO: rewrite this tests -#[cfg(ignore)] #[traced_test] #[tokio::test] async fn test_unary_propose_slow_path_works() { let connects = init_mocked_connects(5, |id, conn| { - conn.expect_propose() + conn.expect_propose_stream() .return_once(move |_req, _token, _timeout| { - let resp = match id { - 0 => ProposeResponse::new_result::( - &Ok(TestCommandResult::default()), - false, - ), - 1 | 2 | 3 => ProposeResponse::new_empty(), - 4 => return Err(CurpError::key_conflict()), - _ => unreachable!("there are only 5 nodes"), + assert_eq!(id, 0, "followers should not receive propose"); + let resp = async_stream::stream! { + yield Ok(build_propose_response(false)); + tokio::time::sleep(Duration::from_millis(100)).await; + yield Ok(build_synced_response()); }; - Ok(tonic::Response::new(resp)) - }); - conn.expect_wait_synced() - .return_once(move |_req, _timeout| { - assert!(id == 0, "wait synced should send to leader"); - std::thread::sleep(Duration::from_millis(100)); - Ok(tonic::Response::new(WaitSyncedResponse::new_from_result::< - TestCommand, - >( - Ok(TestCommandResult::default()), - Some(Ok(1.into())), - ))) + Ok(tonic::Response::new(Box::new(resp))) }); + conn.expect_record().return_once(move |_req, _timeout| { + let resp = match id { + 0 => unreachable!("leader should not receive record request"), + 1 | 2 | 3 => RecordResponse { conflict: false }, + 4 => RecordResponse { conflict: true }, + _ => unreachable!("there are only 5 nodes"), + }; + Ok(tonic::Response::new(resp)) + }); }); + let unary = init_unary_client(connects, None, Some(0), 1, 0, None); let start_at = Instant::now(); let res = unary - .propose(&TestCommand::default(), None, false) + .propose(&TestCommand::new_put(vec![1], 1), None, false) .await .unwrap() .unwrap(); @@ -362,42 +358,36 @@ async fn test_unary_propose_slow_path_works() { ); } -// TODO: rewrite this tests -#[cfg(ignore)] #[traced_test] #[tokio::test] async fn test_unary_propose_fast_path_fallback_slow_path() { + // record how many times `handle_propose` was invoked. let connects = init_mocked_connects(5, |id, conn| { - conn.expect_propose() + conn.expect_propose_stream() .return_once(move |_req, _token, _timeout| { - // insufficient quorum to force slow path. - let resp = match id { - 0 => ProposeResponse::new_result::( - &Ok(TestCommandResult::default()), - false, - ), - 1 | 2 => ProposeResponse::new_empty(), - 3 | 4 => return Err(CurpError::key_conflict()), - _ => unreachable!("there are only 5 nodes"), + assert_eq!(id, 0, "followers should not receive propose"); + let resp = async_stream::stream! { + yield Ok(build_propose_response(false)); + tokio::time::sleep(Duration::from_millis(100)).await; + yield Ok(build_synced_response()); }; - Ok(tonic::Response::new(resp)) - }); - conn.expect_wait_synced() - .return_once(move |_req, _timeout| { - assert!(id == 0, "wait synced should send to leader"); - std::thread::sleep(Duration::from_millis(100)); - Ok(tonic::Response::new(WaitSyncedResponse::new_from_result::< - TestCommand, - >( - Ok(TestCommandResult::default()), - Some(Ok(1.into())), - ))) + Ok(tonic::Response::new(Box::new(resp))) }); + // insufficient quorum + conn.expect_record().return_once(move |_req, _timeout| { + let resp = match id { + 0 => unreachable!("leader should not receive record request"), + 1 | 2 => RecordResponse { conflict: false }, + 3 | 4 => RecordResponse { conflict: true }, + _ => unreachable!("there are only 5 nodes"), + }; + Ok(tonic::Response::new(resp)) + }); }); let unary = init_unary_client(connects, None, Some(0), 1, 0, None); let start_at = Instant::now(); let res = unary - .propose(&TestCommand::default(), None, true) + .propose(&TestCommand::new_put(vec![1], 1), None, true) .await .unwrap() .unwrap(); @@ -405,14 +395,13 @@ async fn test_unary_propose_fast_path_fallback_slow_path() { start_at.elapsed() > Duration::from_millis(100), "slow round takes at least 100ms" ); + // indicate that we actually run out of fast round assert_eq!( res, (TestCommandResult::default(), Some(LogIndexResult::from(1))) ); } -// TODO: rewrite this tests -#[cfg(ignore)] #[traced_test] #[tokio::test] async fn test_unary_propose_return_early_err() { @@ -428,26 +417,22 @@ async fn test_unary_propose_return_early_err() { assert!(early_err.should_abort_fast_round()); // record how many times rpc was invoked. let counter = Arc::new(Mutex::new(0)); - let connects = init_mocked_connects(5, |id, conn| { + let connects = init_mocked_connects(5, |_id, conn| { let err = early_err.clone(); let counter_c = Arc::clone(&counter); - conn.expect_propose() + conn.expect_propose_stream() .return_once(move |_req, _token, _timeout| { - counter_c.lock().unwrap().add_assign(1); + *counter_c.lock().unwrap() += 1; Err(err) }); + let err = early_err.clone(); - let counter_c = Arc::clone(&counter); - conn.expect_wait_synced() - .return_once(move |_req, _timeout| { - assert!(id == 0, "wait synced should send to leader"); - counter_c.lock().unwrap().add_assign(1); - Err(err) - }); + conn.expect_record() + .return_once(move |_req, _timeout| Err(err)); }); let unary = init_unary_client(connects, None, Some(0), 1, 0, None); let err = unary - .propose(&TestCommand::default(), None, true) + .propose(&TestCommand::new_put(vec![1], 1), None, true) .await .unwrap_err(); assert_eq!(err, early_err); @@ -457,8 +442,6 @@ async fn test_unary_propose_return_early_err() { // Tests for retry layer -// TODO: rewrite this tests -#[cfg(ignore)] #[traced_test] #[tokio::test] async fn test_retry_propose_return_no_retry_error() { @@ -471,22 +454,18 @@ async fn test_retry_propose_return_no_retry_error() { ] { // record how many times rpc was invoked. let counter = Arc::new(Mutex::new(0)); - let connects = init_mocked_connects(5, |id, conn| { + let connects = init_mocked_connects(5, |_id, conn| { let err = early_err.clone(); let counter_c = Arc::clone(&counter); - conn.expect_propose() + conn.expect_propose_stream() .return_once(move |_req, _token, _timeout| { - counter_c.lock().unwrap().add_assign(1); + *counter_c.lock().unwrap() += 1; Err(err) }); + let err = early_err.clone(); - let counter_c = Arc::clone(&counter); - conn.expect_wait_synced() - .return_once(move |_req, _timeout| { - assert!(id == 0, "wait synced should send to leader"); - counter_c.lock().unwrap().add_assign(1); - Err(err) - }); + conn.expect_record() + .return_once(move |_req, _timeout| Err(err)); }); let unary = init_unary_client(connects, None, Some(0), 1, 0, None); let retry = Retry::new( @@ -495,27 +474,22 @@ async fn test_retry_propose_return_no_retry_error() { None, ); let err = retry - .propose(&TestCommand::default(), None, false) + .propose(&TestCommand::new_put(vec![1], 1), None, false) .await .unwrap_err(); assert_eq!(err.message(), tonic::Status::from(early_err).message()); - // fast path + slow path = 2 - assert_eq!(*counter.lock().unwrap(), 2); + assert_eq!(*counter.lock().unwrap(), 1); } } -// TODO: rewrite this tests -#[cfg(ignore)] #[traced_test] #[tokio::test] async fn test_retry_propose_return_retry_error() { for early_err in [ - CurpError::key_conflict(), CurpError::RpcTransport(()), CurpError::internal("No reason"), ] { let connects = init_mocked_connects(5, |id, conn| { - let err = early_err.clone(); conn.expect_fetch_cluster() .returning(move |_req, _timeout| { Ok(tonic::Response::new(FetchClusterResponse { @@ -532,14 +506,16 @@ async fn test_retry_propose_return_retry_error() { cluster_version: 1, })) }); - conn.expect_propose() - .returning(move |_req, _token, _timeout| Err(err.clone())); if id == 0 { let err = early_err.clone(); - conn.expect_wait_synced() - .times(5) // wait synced should be retried in 5 times on leader - .returning(move |_req, _timeout| Err(err.clone())); + conn.expect_propose_stream() + .times(5) // propose should be retried in 5 times on leader + .returning(move |_req, _token, _timeout| Err(err.clone())); } + + let err = early_err.clone(); + conn.expect_record() + .returning(move |_req, _timeout| Err(err.clone())); }); let unary = init_unary_client(connects, None, Some(0), 1, 0, None); let retry = Retry::new( @@ -548,7 +524,7 @@ async fn test_retry_propose_return_retry_error() { None, ); let err = retry - .propose(&TestCommand::default(), None, false) + .propose(&TestCommand::new_put(vec![1], 1), None, false) .await .unwrap_err(); assert!(err.message().contains("request timeout")); From debbdabe75564412a701ad476f9be6258b0ab4e0 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 15 Aug 2024 15:07:22 +0800 Subject: [PATCH 036/322] fix: exe_exactly_once_on_leader will only test on leader Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/tests/it/common/curp_group.rs | 4 ++++ crates/curp/tests/it/server.rs | 10 ++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/crates/curp/tests/it/common/curp_group.rs b/crates/curp/tests/it/common/curp_group.rs index e2dbaab8d..e95694aa8 100644 --- a/crates/curp/tests/it/common/curp_group.rs +++ b/crates/curp/tests/it/common/curp_group.rs @@ -318,6 +318,10 @@ impl CurpGroup { &self.nodes[id] } + pub fn get_node_mut(&mut self, id: &ServerId) -> &mut CurpNode { + self.nodes.get_mut(id).unwrap() + } + pub async fn new_client(&self) -> impl ClientApi { let addrs = self.all_addrs().cloned().collect(); ClientBuilder::new(ClientConfig::default(), true) diff --git a/crates/curp/tests/it/server.rs b/crates/curp/tests/it/server.rs index 04c318e8f..ebd400373 100644 --- a/crates/curp/tests/it/server.rs +++ b/crates/curp/tests/it/server.rs @@ -93,14 +93,12 @@ async fn exe_exactly_once_on_leader() { let er = client.propose(&cmd, None, true).await.unwrap().unwrap().0; assert_eq!(er, TestCommandResult::new(vec![], vec![])); + let leader = group.get_leader().await.0; { - let mut exe_futs = group - .exe_rxs() - .map(|rx| rx.recv()) - .collect::>(); - let (cmd1, er) = exe_futs.next().await.unwrap().unwrap(); + let exec_rx = &mut group.get_node_mut(&leader).exe_rx; + let (cmd1, er) = exec_rx.recv().await.unwrap(); assert!( - tokio::time::timeout(Duration::from_millis(100), exe_futs.next()) + tokio::time::timeout(Duration::from_millis(100), exec_rx.recv()) .await .is_err() ); From ba555d5af32f0e15fbe104a0cad0d9c455e0e960 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 15 Aug 2024 15:10:02 +0800 Subject: [PATCH 037/322] fix: concurrent_cmd_order_should_have_correct_revision timeout due to serial execution Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/tests/it/server.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/curp/tests/it/server.rs b/crates/curp/tests/it/server.rs index ebd400373..9eeb5878a 100644 --- a/crates/curp/tests/it/server.rs +++ b/crates/curp/tests/it/server.rs @@ -260,7 +260,7 @@ async fn concurrent_cmd_order_should_have_correct_revision() { let sample_range = 1..=100; for i in sample_range.clone() { - let rand_dur = Duration::from_millis(thread_rng().gen_range(0..500).numeric_cast()); + let rand_dur = Duration::from_millis(thread_rng().gen_range(0..50).numeric_cast()); let _er = client .propose( &TestCommand::new_put(vec![i], i).set_as_dur(rand_dur), From a26b114325cf3f9be3535fcaca3e81e6236610b8 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 15 Aug 2024 17:42:55 +0800 Subject: [PATCH 038/322] fix: execute early before after sync Prevents updating the state early before speculative execution Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/xline/src/storage/kv_store.rs | 32 ++++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/crates/xline/src/storage/kv_store.rs b/crates/xline/src/storage/kv_store.rs index 7b92043d9..e69a7d709 100644 --- a/crates/xline/src/storage/kv_store.rs +++ b/crates/xline/src/storage/kv_store.rs @@ -966,6 +966,17 @@ impl KvStore { { let (new_rev, prev_rev_opt) = index.register_revision(req.key.clone(), revision, *sub_revision); + let execute_resp = to_execute + .then(|| { + self.generate_put_resp( + req, + txn_db, + prev_rev_opt.map(|key_rev| key_rev.as_revision()), + ) + .map(|(resp, _)| resp.into()) + }) + .transpose()?; + let mut kv = KeyValue { key: req.key.clone(), value: req.value.clone(), @@ -1009,17 +1020,6 @@ impl KvStore { prev_kv: None, }]; - let execute_resp = to_execute - .then(|| { - self.generate_put_resp( - req, - txn_db, - prev_rev_opt.map(|key_rev| key_rev.as_revision()), - ) - .map(|(resp, _)| resp.into()) - }) - .transpose()?; - Ok((events, execute_resp)) } @@ -1036,6 +1036,11 @@ impl KvStore { where T: XlineStorageOps, { + let execute_resp = to_execute + .then(|| self.generate_delete_range_resp(req, txn_db, index)) + .transpose()? + .map(Into::into); + let keys = Self::delete_keys( txn_db, index, @@ -1047,11 +1052,6 @@ impl KvStore { Self::detach_leases(&keys, &self.lease_collection); - let execute_resp = to_execute - .then(|| self.generate_delete_range_resp(req, txn_db, index)) - .transpose()? - .map(Into::into); - Ok((Self::new_deletion_events(revision, keys), execute_resp)) } From b2caa6ea3e1035c54e92d05e049349d82e8a25d8 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 15 Aug 2024 19:22:10 +0800 Subject: [PATCH 039/322] refactor: disable fast path completely in etcd competible server Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/xline/src/server/auth_server.rs | 45 +++++++++++-------------- crates/xline/src/server/kv_server.rs | 2 +- crates/xline/src/server/lease_server.rs | 12 +++---- crates/xline/src/server/lock_server.rs | 13 ++++--- crates/xline/src/server/maintenance.rs | 6 ++-- 5 files changed, 32 insertions(+), 46 deletions(-) diff --git a/crates/xline/src/server/auth_server.rs b/crates/xline/src/server/auth_server.rs index 33a0949ef..bd285d926 100644 --- a/crates/xline/src/server/auth_server.rs +++ b/crates/xline/src/server/auth_server.rs @@ -51,7 +51,6 @@ impl AuthServer { async fn propose( &self, request: tonic::Request, - use_fast_path: bool, ) -> Result<(CommandResponse, Option), tonic::Status> where T: Into, @@ -59,7 +58,7 @@ impl AuthServer { let auth_info = self.auth_store.try_get_auth_info_from_request(&request)?; let request = request.into_inner().into(); let cmd = Command::new_with_auth_info(request, auth_info); - let res = self.client.propose(&cmd, None, use_fast_path).await??; + let res = self.client.propose(&cmd, None, false).await??; Ok(res) } @@ -67,13 +66,12 @@ impl AuthServer { async fn handle_req( &self, request: tonic::Request, - use_fast_path: bool, ) -> Result, tonic::Status> where Req: Into, Res: From, { - let (cmd_res, sync_res) = self.propose(request, use_fast_path).await?; + let (cmd_res, sync_res) = self.propose(request).await?; let mut res_wrapper = cmd_res.into_inner(); if let Some(sync_res) = sync_res { res_wrapper.update_revision(sync_res.revision()); @@ -89,7 +87,7 @@ impl Auth for AuthServer { request: tonic::Request, ) -> Result, tonic::Status> { debug!("Receive AuthEnableRequest {:?}", request); - self.handle_req(request, false).await + self.handle_req(request).await } async fn auth_disable( @@ -97,7 +95,7 @@ impl Auth for AuthServer { request: tonic::Request, ) -> Result, tonic::Status> { debug!("Receive AuthDisableRequest {:?}", request); - self.handle_req(request, false).await + self.handle_req(request).await } async fn auth_status( @@ -105,8 +103,7 @@ impl Auth for AuthServer { request: tonic::Request, ) -> Result, tonic::Status> { debug!("Receive AuthStatusRequest {:?}", request); - let is_fast_path = true; - self.handle_req(request, is_fast_path).await + self.handle_req(request).await } async fn authenticate( @@ -114,7 +111,7 @@ impl Auth for AuthServer { request: tonic::Request, ) -> Result, tonic::Status> { debug!("Receive AuthenticateRequest {:?}", request); - self.handle_req(request, false).await + self.handle_req(request).await } async fn user_add( @@ -128,7 +125,7 @@ impl Auth for AuthServer { .map_err(|err| tonic::Status::internal(format!("Failed to hash password: {err}")))?; user_add_req.hashed_password = hashed_password; user_add_req.password = String::new(); - self.handle_req(request, false).await + self.handle_req(request).await } async fn user_get( @@ -136,8 +133,7 @@ impl Auth for AuthServer { request: tonic::Request, ) -> Result, tonic::Status> { debug!("Receive AuthUserGetRequest {:?}", request); - let is_fast_path = true; - self.handle_req(request, is_fast_path).await + self.handle_req(request).await } async fn user_list( @@ -145,8 +141,7 @@ impl Auth for AuthServer { request: tonic::Request, ) -> Result, tonic::Status> { debug!("Receive AuthUserListRequest {:?}", request); - let is_fast_path = true; - self.handle_req(request, is_fast_path).await + self.handle_req(request).await } async fn user_delete( @@ -154,7 +149,7 @@ impl Auth for AuthServer { request: tonic::Request, ) -> Result, tonic::Status> { debug!("Receive AuthUserDeleteRequest {:?}", request); - self.handle_req(request, false).await + self.handle_req(request).await } async fn user_change_password( @@ -167,7 +162,7 @@ impl Auth for AuthServer { .map_err(|err| tonic::Status::internal(format!("Failed to hash password: {err}")))?; user_change_password_req.hashed_password = hashed_password; user_change_password_req.password = String::new(); - self.handle_req(request, false).await + self.handle_req(request).await } async fn user_grant_role( @@ -175,7 +170,7 @@ impl Auth for AuthServer { request: tonic::Request, ) -> Result, tonic::Status> { debug!("Receive AuthUserGrantRoleRequest {:?}", request); - self.handle_req(request, false).await + self.handle_req(request).await } async fn user_revoke_role( @@ -183,7 +178,7 @@ impl Auth for AuthServer { request: tonic::Request, ) -> Result, tonic::Status> { debug!("Receive AuthUserRevokeRoleRequest {:?}", request); - self.handle_req(request, false).await + self.handle_req(request).await } async fn role_add( @@ -192,7 +187,7 @@ impl Auth for AuthServer { ) -> Result, tonic::Status> { debug!("Receive AuthRoleAddRequest {:?}", request); request.get_ref().validation()?; - self.handle_req(request, false).await + self.handle_req(request).await } async fn role_get( @@ -200,8 +195,7 @@ impl Auth for AuthServer { request: tonic::Request, ) -> Result, tonic::Status> { debug!("Receive AuthRoleGetRequest {:?}", request); - let is_fast_path = true; - self.handle_req(request, is_fast_path).await + self.handle_req(request).await } async fn role_list( @@ -209,8 +203,7 @@ impl Auth for AuthServer { request: tonic::Request, ) -> Result, tonic::Status> { debug!("Receive AuthRoleListRequest {:?}", request); - let is_fast_path = true; - self.handle_req(request, is_fast_path).await + self.handle_req(request).await } async fn role_delete( @@ -218,7 +211,7 @@ impl Auth for AuthServer { request: tonic::Request, ) -> Result, tonic::Status> { debug!("Receive AuthRoleDeleteRequest {:?}", request); - self.handle_req(request, false).await + self.handle_req(request).await } async fn role_grant_permission( @@ -230,7 +223,7 @@ impl Auth for AuthServer { request.get_ref() ); request.get_ref().validation()?; - self.handle_req(request, false).await + self.handle_req(request).await } async fn role_revoke_permission( @@ -241,6 +234,6 @@ impl Auth for AuthServer { "Receive AuthRoleRevokePermissionRequest {}", request.get_ref() ); - self.handle_req(request, false).await + self.handle_req(request).await } } diff --git a/crates/xline/src/server/kv_server.rs b/crates/xline/src/server/kv_server.rs index 1bdf482c7..7e87064f3 100644 --- a/crates/xline/src/server/kv_server.rs +++ b/crates/xline/src/server/kv_server.rs @@ -258,7 +258,7 @@ impl Kv for KvServer { } else { Either::Right(async {}) }; - let (cmd_res, _sync_res) = self.client.propose(&cmd, None, !physical).await??; + let (cmd_res, _sync_res) = self.client.propose(&cmd, None, false).await??; let resp = cmd_res.into_inner(); if timeout(self.compact_timeout, compact_physical_fut) .await diff --git a/crates/xline/src/server/lease_server.rs b/crates/xline/src/server/lease_server.rs index 931abb015..d528c1c8d 100644 --- a/crates/xline/src/server/lease_server.rs +++ b/crates/xline/src/server/lease_server.rs @@ -119,7 +119,6 @@ impl LeaseServer { async fn propose( &self, request: tonic::Request, - use_fast_path: bool, ) -> Result<(CommandResponse, Option), tonic::Status> where T: Into, @@ -127,7 +126,7 @@ impl LeaseServer { let auth_info = self.auth_storage.try_get_auth_info_from_request(&request)?; let request = request.into_inner().into(); let cmd = Command::new_with_auth_info(request, auth_info); - let res = self.client.propose(&cmd, None, use_fast_path).await??; + let res = self.client.propose(&cmd, None, false).await??; Ok(res) } @@ -255,8 +254,7 @@ impl Lease for LeaseServer { lease_grant_req.id = self.id_gen.next(); } - let is_fast_path = true; - let (res, sync_res) = self.propose(request, is_fast_path).await?; + let (res, sync_res) = self.propose(request).await?; let mut res: LeaseGrantResponse = res.into_inner().into(); if let Some(sync_res) = sync_res { @@ -276,8 +274,7 @@ impl Lease for LeaseServer { ) -> Result, tonic::Status> { debug!("Receive LeaseRevokeRequest {:?}", request); - let is_fast_path = true; - let (res, sync_res) = self.propose(request, is_fast_path).await?; + let (res, sync_res) = self.propose(request).await?; let mut res: LeaseRevokeResponse = res.into_inner().into(); if let Some(sync_res) = sync_res { @@ -378,8 +375,7 @@ impl Lease for LeaseServer { ) -> Result, tonic::Status> { debug!("Receive LeaseLeasesRequest {:?}", request); - let is_fast_path = true; - let (res, sync_res) = self.propose(request, is_fast_path).await?; + let (res, sync_res) = self.propose(request).await?; let mut res: LeaseLeasesResponse = res.into_inner().into(); if let Some(sync_res) = sync_res { diff --git a/crates/xline/src/server/lock_server.rs b/crates/xline/src/server/lock_server.rs index f5649cb8c..dff302508 100644 --- a/crates/xline/src/server/lock_server.rs +++ b/crates/xline/src/server/lock_server.rs @@ -71,14 +71,13 @@ impl LockServer { &self, request: T, auth_info: Option, - use_fast_path: bool, ) -> Result<(CommandResponse, Option), tonic::Status> where T: Into, { let request = request.into(); let cmd = Command::new_with_auth_info(request, auth_info); - let res = self.client.propose(&cmd, None, use_fast_path).await??; + let res = self.client.propose(&cmd, None, false).await??; Ok(res) } @@ -148,7 +147,7 @@ impl LockServer { max_create_revision: rev, ..Default::default() }; - let (cmd_res, _sync_res) = self.propose(get_req, auth_info.cloned(), false).await?; + let (cmd_res, _sync_res) = self.propose(get_req, auth_info.cloned()).await?; let response = Into::::into(cmd_res.into_inner()); let last_key = match response.kvs.first() { Some(kv) => kv.key.clone(), @@ -186,7 +185,7 @@ impl LockServer { key: key.into(), ..Default::default() }; - let (cmd_res, _) = self.propose(del_req, auth_info, true).await?; + let (cmd_res, _) = self.propose(del_req, auth_info).await?; let res = Into::::into(cmd_res.into_inner()); Ok(res.header) } @@ -198,7 +197,7 @@ impl LockServer { ttl: DEFAULT_SESSION_TTL, id: lease_id, }; - let (cmd_res, _) = self.propose(lease_grant_req, auth_info, true).await?; + let (cmd_res, _) = self.propose(lease_grant_req, auth_info).await?; let res = Into::::into(cmd_res.into_inner()); Ok(res.id) } @@ -229,7 +228,7 @@ impl Lock for LockServer { let key = format!("{prefix}{lease_id:x}"); let txn = Self::create_acquire_txn(&prefix, lease_id); - let (cmd_res, sync_res) = self.propose(txn, auth_info.clone(), false).await?; + let (cmd_res, sync_res) = self.propose(txn, auth_info.clone()).await?; let mut txn_res = Into::::into(cmd_res.into_inner()); #[allow(clippy::unwrap_used)] // sync_res always has value when use slow path let my_rev = sync_res.unwrap().revision(); @@ -261,7 +260,7 @@ impl Lock for LockServer { key: key.as_bytes().to_vec(), ..Default::default() }; - let result = self.propose(range_req, auth_info.clone(), true).await; + let result = self.propose(range_req, auth_info.clone()).await; match result { Ok(res) => { let res = Into::::into(res.0.into_inner()); diff --git a/crates/xline/src/server/maintenance.rs b/crates/xline/src/server/maintenance.rs index e8bc522c1..f0ffc01d0 100644 --- a/crates/xline/src/server/maintenance.rs +++ b/crates/xline/src/server/maintenance.rs @@ -84,7 +84,6 @@ impl MaintenanceServer { async fn propose( &self, request: tonic::Request, - use_fast_path: bool, ) -> Result<(CommandResponse, Option), tonic::Status> where T: Into + Debug, @@ -92,7 +91,7 @@ impl MaintenanceServer { let auth_info = self.auth_store.try_get_auth_info_from_request(&request)?; let request = request.into_inner().into(); let cmd = Command::new_with_auth_info(request, auth_info); - let res = self.client.propose(&cmd, None, use_fast_path).await??; + let res = self.client.propose(&cmd, None, false).await??; Ok(res) } } @@ -103,8 +102,7 @@ impl Maintenance for MaintenanceServer { &self, request: tonic::Request, ) -> Result, tonic::Status> { - let is_fast_path = true; - let (res, sync_res) = self.propose(request, is_fast_path).await?; + let (res, sync_res) = self.propose(request).await?; let mut res: AlarmResponse = res.into_inner().into(); if let Some(sync_res) = sync_res { let revision = sync_res.revision(); From d2258711b294bb3e7057020f950c6d2a21eefba6 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 20 Aug 2024 09:21:50 +0800 Subject: [PATCH 040/322] fix: use after sync txn and index in lease revoke Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/xline/src/server/command.rs | 10 ++- crates/xline/src/storage/lease_store/mod.rs | 90 +++++++++++++-------- 2 files changed, 64 insertions(+), 36 deletions(-) diff --git a/crates/xline/src/server/command.rs b/crates/xline/src/server/command.rs index 423e91739..183433b84 100644 --- a/crates/xline/src/server/command.rs +++ b/crates/xline/src/server/command.rs @@ -295,10 +295,11 @@ impl CommandExecutor { } /// After sync other type of commands - fn after_sync_others( + fn after_sync_others( &self, wrapper: &RequestWrapper, txn_db: &T, + index: &I, general_revision: &RevisionNumberGeneratorState<'_>, auth_revision: &RevisionNumberGeneratorState<'_>, to_execute: bool, @@ -311,6 +312,7 @@ impl CommandExecutor { > where T: XlineStorageOps + TransactionApi, + I: IndexOperate, { let er = to_execute .then(|| match wrapper.backend() { @@ -323,7 +325,10 @@ impl CommandExecutor { let (asr, wr_ops) = match wrapper.backend() { RequestBackend::Auth => self.auth_storage.after_sync(wrapper, auth_revision)?, - RequestBackend::Lease => self.lease_storage.after_sync(wrapper, general_revision)?, + RequestBackend::Lease => { + self.lease_storage + .after_sync(wrapper, general_revision, txn_db, index)? + } RequestBackend::Alarm => self.alarm_storage.after_sync(wrapper, general_revision), RequestBackend::Kv => unreachable!("Should not sync kv commands"), }; @@ -473,6 +478,7 @@ impl CurpCommandExecutor for CommandExecutor { .after_sync_others( wrapper, &txn_db, + &index_state, &general_revision_state, &auth_revision_state, to_execute, diff --git a/crates/xline/src/storage/lease_store/mod.rs b/crates/xline/src/storage/lease_store/mod.rs index c396d669a..36adf1b48 100644 --- a/crates/xline/src/storage/lease_store/mod.rs +++ b/crates/xline/src/storage/lease_store/mod.rs @@ -29,7 +29,8 @@ use xlineapi::{ pub(crate) use self::{lease::Lease, lease_collection::LeaseCollection}; use super::{ db::{WriteOp, DB}, - index::Index, + index::{Index, IndexOperate}, + storage_api::XlineStorageOps, }; use crate::{ header_gen::HeaderGenerator, @@ -52,6 +53,7 @@ pub(crate) struct LeaseStore { lease_collection: Arc, /// Db to store lease db: Arc, + #[allow(unused)] // used in tests /// Key to revision index index: Arc, /// Header generator @@ -98,18 +100,25 @@ impl LeaseStore { } /// sync a lease request - pub(crate) fn after_sync( + pub(crate) fn after_sync( &self, request: &RequestWrapper, revision_gen: &RevisionNumberGeneratorState<'_>, - ) -> Result<(SyncResponse, Vec), ExecuteError> { + txn_db: &T, + index: &I, + ) -> Result<(SyncResponse, Vec), ExecuteError> + where + T: XlineStorageOps + TransactionApi, + I: IndexOperate, + { let revision = if request.skip_lease_revision() { revision_gen.get() } else { revision_gen.next() }; - self.sync_request(request, revision) - .map(|(rev, ops)| (SyncResponse::new(rev), ops)) + // TODO: return only a `SyncResponse` + self.sync_request(request, revision, txn_db, index) + .map(|rev| (SyncResponse::new(rev), vec![])) } /// Get lease by id @@ -273,36 +282,45 @@ impl LeaseStore { } /// Sync `RequestWithToken` - fn sync_request( + fn sync_request( &self, wrapper: &RequestWrapper, revision: i64, - ) -> Result<(i64, Vec), ExecuteError> { + txn_db: &T, + index: &I, + ) -> Result + where + T: XlineStorageOps + TransactionApi, + I: IndexOperate, + { #[allow(clippy::wildcard_enum_match_arm)] - let ops = match *wrapper { + match *wrapper { RequestWrapper::LeaseGrantRequest(ref req) => { debug!("Sync LeaseGrantRequest {:?}", req); - self.sync_lease_grant_request(req) + self.sync_lease_grant_request(req, txn_db)?; } RequestWrapper::LeaseRevokeRequest(ref req) => { debug!("Sync LeaseRevokeRequest {:?}", req); - self.sync_lease_revoke_request(req, revision)? + self.sync_lease_revoke_request(req, revision, txn_db, index)?; } RequestWrapper::LeaseLeasesRequest(ref req) => { debug!("Sync LeaseLeasesRequest {:?}", req); - vec![] } _ => unreachable!("Other request should not be sent to this store"), }; - Ok((revision, ops)) + Ok(revision) } /// Sync `LeaseGrantRequest` - fn sync_lease_grant_request(&self, req: &LeaseGrantRequest) -> Vec { + fn sync_lease_grant_request( + &self, + req: &LeaseGrantRequest, + txn_db: &T, + ) -> Result<(), ExecuteError> { let lease = self .lease_collection .grant(req.id, req.ttl, self.is_primary()); - vec![WriteOp::PutLease(lease)] + txn_db.write_op(WriteOp::PutLease(lease)) } /// Get all `PbLease` @@ -320,14 +338,19 @@ impl LeaseStore { } /// Sync `LeaseRevokeRequest` - fn sync_lease_revoke_request( + fn sync_lease_revoke_request( &self, req: &LeaseRevokeRequest, revision: i64, - ) -> Result, ExecuteError> { - let mut ops = Vec::new(); + txn_db: &T, + index: &I, + ) -> Result<(), ExecuteError> + where + T: XlineStorageOps + TransactionApi, + I: IndexOperate, + { let mut updates = Vec::new(); - ops.push(WriteOp::DeleteLease(req.id)); + txn_db.write_op(WriteOp::DeleteLease(req.id))?; let del_keys = match self.lease_collection.look_up(req.id) { Some(l) => l.keys(), @@ -336,31 +359,24 @@ impl LeaseStore { if del_keys.is_empty() { let _ignore = self.lease_collection.revoke(req.id); - return Ok(Vec::new()); + return Ok(()); } - let txn_db = self.db.transaction(); - let txn_index = self.index.state(); - for (key, mut sub_revision) in del_keys.iter().zip(0..) { let deleted = - KvStore::delete_keys(&txn_db, &txn_index, key, &[], revision, &mut sub_revision)?; + KvStore::delete_keys(txn_db, index, key, &[], revision, &mut sub_revision)?; KvStore::detach_leases(&deleted, &self.lease_collection); let mut del_event = KvStore::new_deletion_events(revision, deleted); updates.append(&mut del_event); } - txn_db - .commit() - .map_err(|e| ExecuteError::DbError(e.to_string()))?; - txn_index.commit(); - let _ignore = self.lease_collection.revoke(req.id); assert!( self.kv_update_tx.send((revision, updates)).is_ok(), "Failed to send updates to KV watcher" ); - Ok(ops) + + Ok(()) } } @@ -430,7 +446,9 @@ mod test { #[tokio::test(flavor = "multi_thread")] async fn test_lease_sync() -> Result<(), Box> { let db = DB::open(&EngineConfig::Memory)?; - let (lease_store, rev_gen) = init_store(db); + let txn = db.transaction(); + let index = Index::new(); + let (lease_store, rev_gen) = init_store(Arc::clone(&db)); let rev_gen_state = rev_gen.state(); let wait_duration = Duration::from_millis(1); @@ -444,7 +462,7 @@ mod test { "the future should block until the lease is synced" ); - let (_ignore, ops) = lease_store.after_sync(&req1, &rev_gen_state)?; + let (_ignore, ops) = lease_store.after_sync(&req1, &rev_gen_state, &txn, &index)?; lease_store.db.write_ops(ops)?; lease_store.mark_lease_synced(&req1); @@ -465,7 +483,7 @@ mod test { "the future should block until the lease is synced" ); - let (_ignore, ops) = lease_store.after_sync(&req2, &rev_gen_state)?; + let (_ignore, ops) = lease_store.after_sync(&req2, &rev_gen_state, &txn, &index)?; lease_store.db.write_ops(ops)?; lease_store.mark_lease_synced(&req2); @@ -522,8 +540,12 @@ mod test { rev_gen: &RevisionNumberGeneratorState<'_>, ) -> Result { let cmd_res = ls.execute(req)?; - let (_ignore, ops) = ls.after_sync(req, rev_gen)?; - ls.db.write_ops(ops)?; + let txn = ls.db.transaction(); + let index = ls.index.state(); + let (_ignore, _ops) = ls.after_sync(req, rev_gen, &txn, &index)?; + txn.commit() + .map_err(|e| ExecuteError::DbError(e.to_string()))?; + index.commit(); rev_gen.commit(); Ok(cmd_res.into_inner()) } From 46ae7d342b1b6e9fdffbea7384ccbd85bd6945f0 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 20 Aug 2024 09:26:53 +0800 Subject: [PATCH 041/322] chore: remove unecessary txn usage Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/xline/src/storage/kv_store.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/crates/xline/src/storage/kv_store.rs b/crates/xline/src/storage/kv_store.rs index e69a7d709..0832b7832 100644 --- a/crates/xline/src/storage/kv_store.rs +++ b/crates/xline/src/storage/kv_store.rs @@ -149,11 +149,9 @@ impl KvStoreInner { /// Get previous `KeyValue` of a `KeyValue` pub(crate) fn get_prev_kv(&self, kv: &KeyValue) -> Option { - let txn_db = self.db.transaction(); - let index = self.index.state(); Self::get_range( - &txn_db, - &index, + self.db.as_ref(), + self.index.as_ref(), &kv.key, &[], kv.mod_revision.overflow_sub(1), @@ -168,11 +166,10 @@ impl KvStoreInner { key_range: KeyRange, revision: i64, ) -> Result, ExecuteError> { - let txn = self.db.transaction(); let revisions = self.index .get_from_rev(key_range.range_start(), key_range.range_end(), revision); - let events = Self::get_values(&txn, &revisions)? + let events = Self::get_values(self.db.as_ref(), &revisions)? .into_iter() .map(|kv| { // Delete From 6b627c42df720e17c4d0d56201ed23703145c53b Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 20 Aug 2024 17:16:42 +0800 Subject: [PATCH 042/322] fix: lease store revision generation Only increments revision number when there's key deleted Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/xline/src/storage/lease_store/mod.rs | 28 ++++++++++++--------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/crates/xline/src/storage/lease_store/mod.rs b/crates/xline/src/storage/lease_store/mod.rs index 36adf1b48..7aab4a111 100644 --- a/crates/xline/src/storage/lease_store/mod.rs +++ b/crates/xline/src/storage/lease_store/mod.rs @@ -16,6 +16,7 @@ use std::{ time::Duration, }; +use clippy_utilities::OverflowArithmetic; use engine::TransactionApi; use log::debug; use parking_lot::RwLock; @@ -111,14 +112,15 @@ impl LeaseStore { T: XlineStorageOps + TransactionApi, I: IndexOperate, { - let revision = if request.skip_lease_revision() { - revision_gen.get() - } else { + let next_revision = revision_gen.get().overflow_add(1); + let updated = self.sync_request(request, next_revision, txn_db, index)?; + let rev = if updated { revision_gen.next() + } else { + revision_gen.get() }; // TODO: return only a `SyncResponse` - self.sync_request(request, revision, txn_db, index) - .map(|rev| (SyncResponse::new(rev), vec![])) + Ok((SyncResponse::new(rev), vec![])) } /// Get lease by id @@ -288,27 +290,29 @@ impl LeaseStore { revision: i64, txn_db: &T, index: &I, - ) -> Result + ) -> Result where T: XlineStorageOps + TransactionApi, I: IndexOperate, { #[allow(clippy::wildcard_enum_match_arm)] - match *wrapper { + let updated = match *wrapper { RequestWrapper::LeaseGrantRequest(ref req) => { debug!("Sync LeaseGrantRequest {:?}", req); self.sync_lease_grant_request(req, txn_db)?; + false } RequestWrapper::LeaseRevokeRequest(ref req) => { debug!("Sync LeaseRevokeRequest {:?}", req); - self.sync_lease_revoke_request(req, revision, txn_db, index)?; + self.sync_lease_revoke_request(req, revision, txn_db, index)? } RequestWrapper::LeaseLeasesRequest(ref req) => { debug!("Sync LeaseLeasesRequest {:?}", req); + false } _ => unreachable!("Other request should not be sent to this store"), }; - Ok(revision) + Ok(updated) } /// Sync `LeaseGrantRequest` @@ -344,7 +348,7 @@ impl LeaseStore { revision: i64, txn_db: &T, index: &I, - ) -> Result<(), ExecuteError> + ) -> Result where T: XlineStorageOps + TransactionApi, I: IndexOperate, @@ -359,7 +363,7 @@ impl LeaseStore { if del_keys.is_empty() { let _ignore = self.lease_collection.revoke(req.id); - return Ok(()); + return Ok(false); } for (key, mut sub_revision) in del_keys.iter().zip(0..) { @@ -376,7 +380,7 @@ impl LeaseStore { "Failed to send updates to KV watcher" ); - Ok(()) + Ok(true) } } From 7bf01d55ab7ead17a49f5ed32d3bbefa1663a681 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 23 Aug 2024 09:56:37 +0800 Subject: [PATCH 043/322] fix: use execute_ro to speculative execute read-only commands Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp-external-api/src/cmd.rs | 8 ++++++++ crates/curp-test-utils/src/test_cmd.rs | 10 ++++++++++ crates/curp/src/server/cmd_worker/mod.rs | 7 +------ crates/xline/src/server/command.rs | 20 +++++++++++++++++++- 4 files changed, 38 insertions(+), 7 deletions(-) diff --git a/crates/curp-external-api/src/cmd.rs b/crates/curp-external-api/src/cmd.rs index c29c221f8..5b282b8bd 100644 --- a/crates/curp-external-api/src/cmd.rs +++ b/crates/curp-external-api/src/cmd.rs @@ -104,6 +104,14 @@ where /// command. fn execute(&self, cmd: &C) -> Result; + /// Execute the read-only command + /// + /// # Errors + /// + /// This function may return an error if there is a problem executing the + /// command. + fn execute_ro(&self, cmd: &C) -> Result<(C::ER, C::ASR), C::Error>; + /// Batch execute the after_sync callback /// /// This `highest_index` means the last log index of the `cmds` diff --git a/crates/curp-test-utils/src/test_cmd.rs b/crates/curp-test-utils/src/test_cmd.rs index 2a7cc980e..c3fa23895 100644 --- a/crates/curp-test-utils/src/test_cmd.rs +++ b/crates/curp-test-utils/src/test_cmd.rs @@ -284,6 +284,16 @@ impl CommandExecutor for TestCE { Ok(result) } + fn execute_ro( + &self, + cmd: &TestCommand, + ) -> Result< + (::ER, ::ASR), + ::Error, + > { + self.execute(cmd).map(|er| (er, LogIndexResult(0))) + } + fn after_sync( &self, cmds: Vec>, diff --git a/crates/curp/src/server/cmd_worker/mod.rs b/crates/curp/src/server/cmd_worker/mod.rs index 95a042597..d70cc20e7 100644 --- a/crates/curp/src/server/cmd_worker/mod.rs +++ b/crates/curp/src/server/cmd_worker/mod.rs @@ -51,12 +51,7 @@ pub(super) fn execute, RC: RoleChange>( unreachable!("should not speculative execute {:?}", entry.entry_data); }; if cmd.is_read_only() { - let result = ce - .after_sync(vec![AfterSyncCmd::new(cmd, true)], None) - .remove(0)?; - let (asr, er_opt) = result.into_parts(); - let er = er_opt.unwrap_or_else(|| unreachable!("er should exist")); - Ok((er, Some(asr))) + ce.execute_ro(cmd).map(|(er, asr)| (er, Some(asr))) } else { let er = ce.execute(cmd); let mut cb_w = cb.write(); diff --git a/crates/xline/src/server/command.rs b/crates/xline/src/server/command.rs index 183433b84..cd564729d 100644 --- a/crates/xline/src/server/command.rs +++ b/crates/xline/src/server/command.rs @@ -15,7 +15,7 @@ use parking_lot::RwLock; use tracing::warn; use utils::{barrier::IdBarrier, table_names::META_TABLE}; use xlineapi::{ - command::{Command, CurpClient}, + command::{Command, CurpClient, SyncResponse}, execute_error::ExecuteError, AlarmAction, AlarmRequest, AlarmType, }; @@ -429,6 +429,24 @@ impl CurpCommandExecutor for CommandExecutor { } } + fn execute_ro( + &self, + cmd: &Command, + ) -> Result< + (::ER, ::ASR), + ::Error, + > { + let er = self.execute(cmd)?; + let wrapper = cmd.request(); + let rev = match wrapper.backend() { + RequestBackend::Kv | RequestBackend::Lease | RequestBackend::Alarm => { + self.kv_storage.revision_gen().get() + } + RequestBackend::Auth => self.auth_storage.revision_gen().get(), + }; + Ok((er, SyncResponse::new(rev))) + } + fn after_sync( &self, cmds: Vec>, From c6d7d9b4258a3c44fd557bea45038ee19a64e77f Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 16 Aug 2024 17:36:57 +0800 Subject: [PATCH 044/322] chore: use join_all to concurrently build clients in benchmark Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- Cargo.lock | 1 + crates/benchmark/Cargo.toml | 1 + crates/benchmark/src/runner.rs | 16 ++++++++++------ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7240258cd..f00733ad3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -332,6 +332,7 @@ dependencies = [ "clap", "clippy-utilities", "etcd-client", + "futures", "indicatif", "rand", "thiserror", diff --git a/crates/benchmark/Cargo.toml b/crates/benchmark/Cargo.toml index cc6a1c215..819ae65c1 100644 --- a/crates/benchmark/Cargo.toml +++ b/crates/benchmark/Cargo.toml @@ -17,6 +17,7 @@ anyhow = "1.0.83" clap = { version = "4", features = ["derive"] } clippy-utilities = "0.2.0" etcd-client = { version = "0.13.0", features = ["tls"] } +futures = "0.3.30" indicatif = "0.17.8" rand = "0.8.5" thiserror = "1.0.61" diff --git a/crates/benchmark/src/runner.rs b/crates/benchmark/src/runner.rs index f53063d59..fb167716f 100644 --- a/crates/benchmark/src/runner.rs +++ b/crates/benchmark/src/runner.rs @@ -9,6 +9,7 @@ use std::{ use anyhow::Result; use clippy_utilities::{NumericCast, OverflowArithmetic}; +use futures::future::join_all; use indicatif::ProgressBar; use rand::RngCore; use tokio::{ @@ -158,7 +159,6 @@ impl CommandRunner { /// Create clients async fn create_clients(&self) -> Result> { - let mut clients = Vec::with_capacity(self.args.clients); let client_options = ClientOptions::default().with_client_config(ClientConfig::new( Duration::from_secs(10), Duration::from_secs(5), @@ -180,11 +180,15 @@ impl CommandRunner { } }) .collect::>(); - for _ in 0..self.args.clients { - let client = - BenchClient::new(addrs.clone(), self.args.use_curp, client_options.clone()).await?; - clients.push(client); - } + let clients_futs = std::iter::repeat_with(|| { + BenchClient::new(addrs.clone(), self.args.use_curp, client_options.clone()) + }) + .take(self.args.clients); + let clients = join_all(clients_futs) + .await + .into_iter() + .collect::>()?; + Ok(clients) } From c4f1dcb4c621b5bcf1e25ce1b2e7981bf9842096 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 23 Aug 2024 15:37:21 +0800 Subject: [PATCH 045/322] fix: remove check_members This check is not necessary, the urls are allowed to be empty before publish Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/unary.rs | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/crates/curp/src/client/unary.rs b/crates/curp/src/client/unary.rs index 7c6dc488f..2acf6658a 100644 --- a/crates/curp/src/client/unary.rs +++ b/crates/curp/src/client/unary.rs @@ -270,20 +270,6 @@ impl ClientApi for Unary { /// Send fetch cluster requests to all servers /// Note: The fetched cluster may still be outdated if `linearizable` is false async fn fetch_cluster(&self, linearizable: bool) -> Result { - /// Checks the member list, returns `true` if all member has been published - fn check_members(members: &[Member]) -> bool { - if members.is_empty() { - return false; - } - for member in members { - if member.client_urls.is_empty() { - debug!("new node {} not published yet", member.id()); - return false; - } - } - true - } - let timeout = self.config.wait_synced_timeout; if !linearizable { // firstly, try to fetch the local server @@ -344,14 +330,14 @@ impl ClientApi for Unary { match max_term.cmp(&inner.term) { Ordering::Less => { max_term = inner.term; - if check_members(&inner.members) { + if !inner.members.is_empty() { res = Some(inner); } // reset ok count to 1 ok_cnt = 1; } Ordering::Equal => { - if check_members(&inner.members) { + if !inner.members.is_empty() { res = Some(inner); } ok_cnt += 1; From 313b819caa305cc17125454224fab8953e3f6784 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 7 Aug 2024 16:43:15 +0800 Subject: [PATCH 046/322] fix: generate propose id inside client retry closure Because client id may change during retry, the propose id generation must be called for each retry Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- Cargo.lock | 2 ++ crates/curp/src/client/retry.rs | 39 +++++++++++++++++++-------------- workspace-hack/Cargo.toml | 2 ++ 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f00733ad3..068f7bfa9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3780,6 +3780,7 @@ dependencies = [ "crypto-common", "digest", "either", + "etcd-client", "futures-channel", "futures-util", "getrandom", @@ -3802,6 +3803,7 @@ dependencies = [ "tokio", "tokio-util", "tonic", + "tonic-build", "tower", "tracing", "tracing-log", diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 607623e4f..ee9e3d6c1 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -224,9 +224,9 @@ where token: Option<&String>, use_fast_path: bool, ) -> Result, tonic::Status> { - let propose_id = self.inner.gen_propose_id()?; - self.retry::<_, _>(|client| { - RepeatableClientApi::propose(client, *propose_id, cmd, token, use_fast_path) + self.retry::<_, _>(|client| async move { + let propose_id = self.inner.gen_propose_id()?; + RepeatableClientApi::propose(client, *propose_id, cmd, token, use_fast_path).await }) .await } @@ -236,19 +236,23 @@ where &self, changes: Vec, ) -> Result, tonic::Status> { - let propose_id = self.inner.gen_propose_id()?; self.retry::<_, _>(|client| { let changes_c = changes.clone(); - RepeatableClientApi::propose_conf_change(client, *propose_id, changes_c) + async move { + let propose_id = self.inner.gen_propose_id()?; + RepeatableClientApi::propose_conf_change(client, *propose_id, changes_c).await + } }) .await } /// Send propose to shutdown cluster async fn propose_shutdown(&self) -> Result<(), tonic::Status> { - let propose_id = self.inner.gen_propose_id()?; - self.retry::<_, _>(|client| RepeatableClientApi::propose_shutdown(client, *propose_id)) - .await + self.retry::<_, _>(|client| async move { + let propose_id = self.inner.gen_propose_id()?; + RepeatableClientApi::propose_shutdown(client, *propose_id).await + }) + .await } /// Send propose to publish a node id and name @@ -258,17 +262,20 @@ where node_name: String, node_client_urls: Vec, ) -> Result<(), Self::Error> { - let propose_id = self.inner.gen_propose_id()?; self.retry::<_, _>(|client| { let name_c = node_name.clone(); let node_client_urls_c = node_client_urls.clone(); - RepeatableClientApi::propose_publish( - client, - *propose_id, - node_id, - name_c, - node_client_urls_c, - ) + async move { + let propose_id = self.inner.gen_propose_id()?; + RepeatableClientApi::propose_publish( + client, + *propose_id, + node_id, + name_c, + node_client_urls_c, + ) + .await + } }) .await } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 7eec178ae..913b7cb78 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -19,6 +19,7 @@ clap = { version = "4", features = ["derive"] } crypto-common = { version = "0.1", default-features = false, features = ["std"] } digest = { version = "0.10", features = ["mac", "std"] } either = { version = "1", default-features = false, features = ["use_std"] } +etcd-client = { version = "0.13", default-features = false, features = ["tls"] } futures-channel = { version = "0.3", features = ["sink"] } futures-util = { version = "0.3", features = ["channel", "io", "sink"] } getrandom = { version = "0.2", default-features = false, features = ["js", "rdrand", "std"] } @@ -57,5 +58,6 @@ predicates = { version = "3", default-features = false, features = ["diff"] } syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-traits", "full"] } syn-f595c2ba2a3f28df = { package = "syn", version = "2", features = ["extra-traits", "full", "visit", "visit-mut"] } tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "rt-multi-thread", "signal", "sync", "time"] } +tonic-build = { version = "0.11" } ### END HAKARI SECTION From 2ba7ae1e641d5a4d99bb3b81dcaad1a2df0be57e Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 23 Aug 2024 16:02:08 +0800 Subject: [PATCH 047/322] refactor: use synchronous compaction in `sync_compaction` It seems sync wait on a event listener will potentialy cause a deadlock, we will address this in the future. Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/xline/src/storage/kv_store.rs | 46 ++++++++-------------------- 1 file changed, 12 insertions(+), 34 deletions(-) diff --git a/crates/xline/src/storage/kv_store.rs b/crates/xline/src/storage/kv_store.rs index 0832b7832..19b8fb20a 100644 --- a/crates/xline/src/storage/kv_store.rs +++ b/crates/xline/src/storage/kv_store.rs @@ -11,8 +11,6 @@ use std::{ use clippy_utilities::{NumericCast, OverflowArithmetic}; use engine::{Transaction, TransactionApi}; -#[cfg(not(madsim))] -use event_listener::Listener; use prost::Message; use tracing::{debug, warn}; use utils::table_names::{KV_TABLE, META_TABLE}; @@ -1121,41 +1119,21 @@ impl KvStore { let ops = vec![WriteOp::PutScheduledCompactRevision(revision)]; // TODO: Remove the physical process logic here. It's better to move into the // KvServer - // TODO: sync compaction task // FIXME: madsim is single threaded, we cannot use synchronous wait here - #[cfg(not(madsim))] - { - let (event, listener) = if req.physical { - let event = Arc::new(event_listener::Event::new()); - let listener = event.listen(); - (Some(event), Some(listener)) - } else { - (None, None) - }; - if let Err(e) = self.compact_task_tx.send((revision, event)) { - panic!("the compactor exited unexpectedly: {e:?}"); - } - if let Some(listener) = listener { - listener.wait(); + let index = self.index(); + let target_revisions = index + .compact(revision) + .into_iter() + .map(|key_rev| key_rev.as_revision().encode_to_vec()) + .collect::>>(); + // Given that the Xline uses a lim-tree database with smaller write amplification as the storage backend , does using progressive compaction really good at improving performance? + for revision_chunk in target_revisions.chunks(1000) { + if let Err(e) = self.compact(revision_chunk) { + panic!("failed to compact revision chunk {revision_chunk:?} due to {e}"); } } - #[cfg(madsim)] - { - let index = self.index(); - let target_revisions = index - .compact(revision) - .into_iter() - .map(|key_rev| key_rev.as_revision().encode_to_vec()) - .collect::>>(); - // Given that the Xline uses a lim-tree database with smaller write amplification as the storage backend , does using progressive compaction really good at improving performance? - for revision_chunk in target_revisions.chunks(1000) { - if let Err(e) = self.compact(revision_chunk) { - panic!("failed to compact revision chunk {revision_chunk:?} due to {e}"); - } - } - if let Err(e) = self.compact_finished(revision) { - panic!("failed to set finished compact revision {revision:?} due to {e}"); - } + if let Err(e) = self.compact_finished(revision) { + panic!("failed to set finished compact revision {revision:?} due to {e}"); } self.inner.db.write_ops(ops)?; From 47a89003661ad865d44ca7500f66b807b2f32002 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 23 Aug 2024 16:20:05 +0800 Subject: [PATCH 048/322] chore: update cargo hakari Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- Cargo.lock | 2 -- workspace-hack/Cargo.toml | 2 -- 2 files changed, 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 068f7bfa9..f00733ad3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3780,7 +3780,6 @@ dependencies = [ "crypto-common", "digest", "either", - "etcd-client", "futures-channel", "futures-util", "getrandom", @@ -3803,7 +3802,6 @@ dependencies = [ "tokio", "tokio-util", "tonic", - "tonic-build", "tower", "tracing", "tracing-log", diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 913b7cb78..7eec178ae 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -19,7 +19,6 @@ clap = { version = "4", features = ["derive"] } crypto-common = { version = "0.1", default-features = false, features = ["std"] } digest = { version = "0.10", features = ["mac", "std"] } either = { version = "1", default-features = false, features = ["use_std"] } -etcd-client = { version = "0.13", default-features = false, features = ["tls"] } futures-channel = { version = "0.3", features = ["sink"] } futures-util = { version = "0.3", features = ["channel", "io", "sink"] } getrandom = { version = "0.2", default-features = false, features = ["js", "rdrand", "std"] } @@ -58,6 +57,5 @@ predicates = { version = "3", default-features = false, features = ["diff"] } syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-traits", "full"] } syn-f595c2ba2a3f28df = { package = "syn", version = "2", features = ["extra-traits", "full", "visit", "visit-mut"] } tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "rt-multi-thread", "signal", "sync", "time"] } -tonic-build = { version = "0.11" } ### END HAKARI SECTION From 1d3c24076194b76f16d8cc1b718020236133b4a6 Mon Sep 17 00:00:00 2001 From: feathercyc Date: Thu, 22 Aug 2024 12:20:15 +0000 Subject: [PATCH 049/322] chore: transfer utils's interval map impl into dependency Signed-off-by: feathercyc --- Cargo.lock | 11 +- crates/utils/Cargo.toml | 3 +- crates/utils/benches/interval_map.rs | 118 -- crates/utils/src/interval_map/mod.rs | 1044 ----------------- crates/utils/src/interval_map/tests.rs | 322 ----- crates/utils/src/lib.rs | 4 +- crates/xline/src/conflict/spec_pool.rs | 2 +- crates/xline/src/conflict/uncommitted_pool.rs | 2 +- workspace-hack/Cargo.toml | 3 +- 9 files changed, 14 insertions(+), 1495 deletions(-) delete mode 100644 crates/utils/benches/interval_map.rs delete mode 100644 crates/utils/src/interval_map/mod.rs delete mode 100644 crates/utils/src/interval_map/tests.rs diff --git a/Cargo.lock b/Cargo.lock index cab788fcd..521f5a544 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2329,6 +2329,12 @@ dependencies = [ "rand_core", ] +[[package]] +name = "rb-interval-map" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2d14796e23a9778dec643e93352dc2404004793627102304f99cb164b47635c" + [[package]] name = "redox_syscall" version = "0.5.1" @@ -3303,8 +3309,7 @@ dependencies = [ "opentelemetry_sdk", "parking_lot", "pbkdf2", - "petgraph", - "rand", + "rb-interval-map", "regex", "serde", "test-macros", @@ -3665,8 +3670,8 @@ dependencies = [ "madsim-tonic", "memchr", "opentelemetry_sdk", - "petgraph", "predicates", + "rand", "serde", "serde_json", "sha2", diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml index 57f6c7b5d..98837284e 100644 --- a/crates/utils/Cargo.toml +++ b/crates/utils/Cargo.toml @@ -25,12 +25,11 @@ derive_builder = "0.20.0" event-listener = "5.3.1" futures = "0.3.30" getset = "0.1" +interval_map = { package = "rb-interval-map" } opentelemetry = { version = "0.24.0", features = ["trace"] } opentelemetry_sdk = { version = "0.24.1", features = ["trace"] } parking_lot = { version = "0.12.3", optional = true } pbkdf2 = { version = "0.12.2", features = ["simple"] } -petgraph = "0.6.4" -rand = "0.8.5" regex = "1.10.5" serde = { version = "1.0.204", features = ["derive"] } thiserror = "1.0.61" diff --git a/crates/utils/benches/interval_map.rs b/crates/utils/benches/interval_map.rs deleted file mode 100644 index 46e93ec75..000000000 --- a/crates/utils/benches/interval_map.rs +++ /dev/null @@ -1,118 +0,0 @@ -#![cfg(bench)] -#![feature(test)] - -extern crate test; -extern crate utils; - -use std::hint::black_box; - -use test::Bencher; -use utils::interval_map::{Interval, IntervalMap}; - -struct Rng { - state: u32, -} - -impl Rng { - fn new() -> Self { - Self { state: 0x87654321 } - } - - fn gen_u32(&mut self) -> u32 { - self.state ^= self.state << 13; - self.state ^= self.state >> 17; - self.state ^= self.state << 5; - self.state - } - - fn gen_range_i32(&mut self, low: i32, high: i32) -> i32 { - let d = (high - low) as u32; - low + (self.gen_u32() % d) as i32 - } -} - -struct IntervalGenerator { - rng: Rng, - limit: i32, -} - -impl IntervalGenerator { - fn new() -> Self { - const LIMIT: i32 = 1000; - Self { - rng: Rng::new(), - limit: LIMIT, - } - } - - fn next(&mut self) -> Interval { - let low = self.rng.gen_range_i32(0, self.limit - 1); - let high = self.rng.gen_range_i32(low + 1, self.limit); - Interval::new(low, high) - } -} - -fn bench_interval_map_insert(count: usize, bench: &mut Bencher) { - let mut gen = IntervalGenerator::new(); - let intervals: Vec<_> = std::iter::repeat_with(|| gen.next()).take(count).collect(); - bench.iter(|| { - let mut map = IntervalMap::new(); - for i in intervals.clone() { - black_box(map.insert(i, ())); - } - }); -} - -fn bench_interval_map_insert_remove(count: usize, bench: &mut Bencher) { - let mut gen = IntervalGenerator::new(); - let intervals: Vec<_> = std::iter::repeat_with(|| gen.next()).take(count).collect(); - bench.iter(|| { - let mut map = IntervalMap::new(); - for i in intervals.clone() { - black_box(map.insert(i, ())); - } - for i in &intervals { - black_box(map.remove(&i)); - } - }); -} - -#[bench] -fn bench_interval_map_insert_100(bench: &mut Bencher) { - bench_interval_map_insert(100, bench); -} - -#[bench] -fn bench_interval_map_insert_1000(bench: &mut Bencher) { - bench_interval_map_insert(1000, bench); -} - -#[bench] -fn bench_interval_map_insert_10000(bench: &mut Bencher) { - bench_interval_map_insert(10_000, bench); -} - -#[bench] -fn bench_interval_map_insert_100000(bench: &mut Bencher) { - bench_interval_map_insert(100_000, bench); -} - -#[bench] -fn bench_interval_map_insert_remove_100(bench: &mut Bencher) { - bench_interval_map_insert_remove(100, bench); -} - -#[bench] -fn bench_interval_map_insert_remove_1000(bench: &mut Bencher) { - bench_interval_map_insert_remove(1000, bench); -} - -#[bench] -fn bench_interval_map_insert_remove_10000(bench: &mut Bencher) { - bench_interval_map_insert_remove(10_000, bench); -} - -#[bench] -fn bench_interval_map_insert_remove_100000(bench: &mut Bencher) { - bench_interval_map_insert_remove(100_000, bench); -} diff --git a/crates/utils/src/interval_map/mod.rs b/crates/utils/src/interval_map/mod.rs deleted file mode 100644 index d03297c3e..000000000 --- a/crates/utils/src/interval_map/mod.rs +++ /dev/null @@ -1,1044 +0,0 @@ -use std::collections::VecDeque; - -use petgraph::graph::{DefaultIx, IndexType, NodeIndex}; - -#[cfg(test)] -mod tests; - -/// An interval-value map, which support operations on dynamic sets of intervals. -#[derive(Debug)] -pub struct IntervalMap { - /// Vector that stores nodes - nodes: Vec>, - /// Root of the interval tree - root: NodeIndex, - /// Number of elements in the map - len: usize, -} - -impl IntervalMap -where - T: Ord, - Ix: IndexType, -{ - /// Creates a new `IntervalMap` with estimated capacity. - #[inline] - #[must_use] - pub fn with_capacity(capacity: usize) -> Self { - let mut nodes = vec![Self::new_sentinel()]; - nodes.reserve(capacity); - IntervalMap { - nodes, - root: Self::sentinel(), - len: 0, - } - } - - /// Inserts a interval-value pair into the map. - /// - /// # Panics - /// - /// This method panics when the tree is at the maximum number of nodes for its index - #[inline] - pub fn insert(&mut self, interval: Interval, value: V) -> Option { - let node_idx = NodeIndex::new(self.nodes.len()); - let node = Self::new_node(interval, value, node_idx); - // check for max capacity, except if we use usize - assert!( - ::max().index() == !0 || NodeIndex::end() != node_idx, - "Reached maximum number of nodes" - ); - self.nodes.push(node); - self.insert_inner(node_idx) - } - - /// Removes a interval from the map, returning the value at the interval if the interval - /// was previously in the map. - #[inline] - pub fn remove(&mut self, interval: &Interval) -> Option { - if let Some(node_idx) = self.search_exact(interval) { - self.remove_inner(node_idx); - // Swap the node with the last node stored in the vector and update indices - let mut node = self.nodes.swap_remove(node_idx.index()); - let old = NodeIndex::::new(self.nodes.len()); - self.update_idx(old, node_idx); - - return node.value.take(); - } - None - } - - /// Checks if an interval in the map overlaps with the given interval. - #[inline] - pub fn overlap(&self, interval: &Interval) -> bool { - let node_idx = self.search(interval); - !self.node_ref(node_idx, Node::is_sentinel) - } - - /// Finds all intervals in the map that overlaps with the given interval. - #[inline] - pub fn find_all_overlap(&self, interval: &Interval) -> Vec<(&Interval, &V)> { - if self.node_ref(self.root, Node::is_sentinel) { - Vec::new() - } else { - self.find_all_overlap_inner_unordered(self.root, interval) - } - } - - /// Returns a reference to the value corresponding to the key. - #[inline] - pub fn get(&self, interval: &Interval) -> Option<&V> { - self.search_exact(interval) - .map(|idx| self.node_ref(idx, Node::value)) - } - - /// Returns a reference to the value corresponding to the key. - #[inline] - pub fn get_mut(&mut self, interval: &Interval) -> Option<&mut V> { - self.search_exact(interval) - .map(|idx| self.node_mut(idx, Node::value_mut)) - } - - /// Gets an iterator over the entries of the map, sorted by key. - #[inline] - #[must_use] - pub fn iter(&self) -> Iter<'_, T, V, Ix> { - Iter { - map_ref: self, - stack: None, - } - } - - /// Gets the given key's corresponding entry in the map for in-place manipulation. - #[inline] - pub fn entry(&mut self, interval: Interval) -> Entry<'_, T, V, Ix> { - match self.search_exact(&interval) { - Some(node) => Entry::Occupied(OccupiedEntry { - map_ref: self, - node, - }), - None => Entry::Vacant(VacantEntry { - map_ref: self, - interval, - }), - } - } - - /// Removes all elements from the map - #[inline] - pub fn clear(&mut self) { - self.nodes.clear(); - self.nodes.push(Self::new_sentinel()); - self.root = Self::sentinel(); - self.len = 0; - } - - /// Returns the number of elements in the map. - #[inline] - #[must_use] - pub fn len(&self) -> usize { - self.len - } - - /// Returns `true` if the map contains no elements. - #[inline] - #[must_use] - pub fn is_empty(&self) -> bool { - self.len() == 0 - } -} - -impl IntervalMap -where - T: Ord, -{ - /// Creates an empty `IntervalMap` - #[must_use] - #[inline] - pub fn new() -> Self { - Self { - nodes: vec![Self::new_sentinel()], - root: Self::sentinel(), - len: 0, - } - } -} - -impl Default for IntervalMap -where - T: Ord, -{ - #[inline] - fn default() -> Self { - Self::with_capacity(0) - } -} - -impl IntervalMap -where - T: Ord, - Ix: IndexType, -{ - /// Creates a new sentinel node - fn new_sentinel() -> Node { - Node { - interval: None, - value: None, - max_index: None, - left: None, - right: None, - parent: None, - color: Color::Black, - } - } - - /// Creates a new tree node - fn new_node(interval: Interval, value: V, index: NodeIndex) -> Node { - Node { - max_index: Some(index), - interval: Some(interval), - value: Some(value), - left: Some(Self::sentinel()), - right: Some(Self::sentinel()), - parent: Some(Self::sentinel()), - color: Color::Red, - } - } - - /// Gets the sentinel node index - fn sentinel() -> NodeIndex { - NodeIndex::new(0) - } -} - -impl IntervalMap -where - T: Ord, - Ix: IndexType, -{ - /// Inserts a node into the tree. - fn insert_inner(&mut self, z: NodeIndex) -> Option { - let mut y = Self::sentinel(); - let mut x = self.root; - - while !self.node_ref(x, Node::is_sentinel) { - y = x; - if self.node_ref(z, Node::interval) == self.node_ref(y, Node::interval) { - let zval = self.node_mut(z, Node::take_value); - let old_value = self.node_mut(y, Node::set_value(zval)); - return Some(old_value); - } - if self.node_ref(z, Node::interval) < self.node_ref(x, Node::interval) { - x = self.node_ref(x, Node::left); - } else { - x = self.node_ref(x, Node::right); - } - } - self.node_mut(z, Node::set_parent(y)); - if self.node_ref(y, Node::is_sentinel) { - self.root = z; - } else { - if self.node_ref(z, Node::interval) < self.node_ref(y, Node::interval) { - self.node_mut(y, Node::set_left(z)); - } else { - self.node_mut(y, Node::set_right(z)); - } - self.update_max_bottom_up(y); - } - self.node_mut(z, Node::set_color(Color::Red)); - - self.insert_fixup(z); - - self.len = self.len.wrapping_add(1); - None - } - - /// Removes a node from the tree. - fn remove_inner(&mut self, z: NodeIndex) { - let mut y = z; - let mut y_orig_color = self.node_ref(y, Node::color); - let x; - if self.left_ref(z, Node::is_sentinel) { - x = self.node_ref(z, Node::right); - self.transplant(z, x); - self.update_max_bottom_up(self.node_ref(z, Node::parent)); - } else if self.right_ref(z, Node::is_sentinel) { - x = self.node_ref(z, Node::left); - self.transplant(z, x); - self.update_max_bottom_up(self.node_ref(z, Node::parent)); - } else { - y = self.tree_minimum(self.node_ref(z, Node::right)); - let mut p = y; - y_orig_color = self.node_ref(y, Node::color); - x = self.node_ref(y, Node::right); - if self.node_ref(y, Node::parent) == z { - self.node_mut(x, Node::set_parent(y)); - } else { - self.transplant(y, x); - p = self.node_ref(y, Node::parent); - self.node_mut(y, Node::set_right(self.node_ref(z, Node::right))); - self.right_mut(y, Node::set_parent(y)); - } - self.transplant(z, y); - self.node_mut(y, Node::set_left(self.node_ref(z, Node::left))); - self.left_mut(y, Node::set_parent(y)); - self.node_mut(y, Node::set_color(self.node_ref(z, Node::color))); - - self.update_max_bottom_up(p); - } - - if matches!(y_orig_color, Color::Black) { - self.remove_fixup(x); - } - - self.len = self.len.wrapping_sub(1); - } - - /// Finds all intervals in the map that overlaps with the given interval. - #[cfg(interval_tree_find_overlap_ordered)] - fn find_all_overlap_inner( - &self, - x: NodeIndex, - interval: &Interval, - ) -> Vec<(&Interval, &V)> { - let mut list = vec![]; - if self.node_ref(x, Node::interval).overlap(interval) { - list.push(self.node_ref(x, |nx| (nx.interval(), nx.value()))); - } - if self.max(self.node_ref(x, Node::left)) >= Some(&interval.low) { - list.extend(self.find_all_overlap_inner(self.node_ref(x, Node::left), interval)); - } - if self - .max(self.node_ref(x, Node::right)) - .map(|rmax| IntervalRef::new(&self.node_ref(x, Node::interval).low, rmax)) - .is_some_and(|i| i.overlap(interval)) - { - list.extend(self.find_all_overlap_inner(self.node_ref(x, Node::right), interval)); - } - list - } - - /// Finds all intervals in the map that overlaps with the given interval. - /// - /// The result is unordered because of breadth-first search to save stack size - fn find_all_overlap_inner_unordered( - &self, - x: NodeIndex, - interval: &Interval, - ) -> Vec<(&Interval, &V)> { - let mut list = Vec::new(); - let mut queue = VecDeque::new(); - queue.push_back(x); - while let Some(p) = queue.pop_front() { - if self.node_ref(p, Node::interval).overlap(interval) { - list.push(self.node_ref(p, |np| (np.interval(), np.value()))); - } - let p_left = self.node_ref(p, Node::left); - let p_right = self.node_ref(p, Node::right); - if self.max(p_left) >= Some(&interval.low) { - queue.push_back(p_left); - } - if self - .max(self.node_ref(p, Node::right)) - .map(|rmax| IntervalRef::new(&self.node_ref(p, Node::interval).low, rmax)) - .is_some_and(|i| i.overlap(interval)) - { - queue.push_back(p_right); - } - } - - list - } - - /// Search for an interval that overlaps with the given interval. - fn search(&self, interval: &Interval) -> NodeIndex { - let mut x = self.root; - while self - .node_ref(x, Node::sentinel) - .map(Node::interval) - .is_some_and(|xi| !xi.overlap(interval)) - { - if self.max(self.node_ref(x, Node::left)) > Some(&interval.low) { - x = self.node_ref(x, Node::left); - } else { - x = self.node_ref(x, Node::right); - } - } - x - } - - /// Search for the node with exact the given interval - fn search_exact(&self, interval: &Interval) -> Option> { - let mut x = self.root; - while !self.node_ref(x, Node::is_sentinel) { - if self.node_ref(x, Node::interval) == interval { - return Some(x); - } - if self.max(x) < Some(&interval.high) { - return None; - } - if self.node_ref(x, Node::interval) > interval { - x = self.node_ref(x, Node::left); - } else { - x = self.node_ref(x, Node::right); - } - } - None - } - - /// Restores red-black tree properties after an insert. - fn insert_fixup(&mut self, mut z: NodeIndex) { - while self.parent_ref(z, Node::is_red) { - if self.grand_parent_ref(z, Node::is_sentinel) { - break; - } - if self.is_left_child(self.node_ref(z, Node::parent)) { - let y = self.grand_parent_ref(z, Node::right); - if self.node_ref(y, Node::is_red) { - self.parent_mut(z, Node::set_color(Color::Black)); - self.node_mut(y, Node::set_color(Color::Black)); - self.grand_parent_mut(z, Node::set_color(Color::Red)); - z = self.parent_ref(z, Node::parent); - } else { - if self.is_right_child(z) { - z = self.node_ref(z, Node::parent); - self.left_rotate(z); - } - self.parent_mut(z, Node::set_color(Color::Black)); - self.grand_parent_mut(z, Node::set_color(Color::Red)); - self.right_rotate(self.parent_ref(z, Node::parent)); - } - } else { - let y = self.grand_parent_ref(z, Node::left); - if self.node_ref(y, Node::is_red) { - self.parent_mut(z, Node::set_color(Color::Black)); - self.node_mut(y, Node::set_color(Color::Black)); - self.grand_parent_mut(z, Node::set_color(Color::Red)); - z = self.parent_ref(z, Node::parent); - } else { - if self.is_left_child(z) { - z = self.node_ref(z, Node::parent); - self.right_rotate(z); - } - self.parent_mut(z, Node::set_color(Color::Black)); - self.grand_parent_mut(z, Node::set_color(Color::Red)); - self.left_rotate(self.parent_ref(z, Node::parent)); - } - } - } - self.node_mut(self.root, Node::set_color(Color::Black)); - } - - /// Restores red-black tree properties after a remove. - fn remove_fixup(&mut self, mut x: NodeIndex) { - while x != self.root && self.node_ref(x, Node::is_black) { - let mut w; - if self.is_left_child(x) { - w = self.parent_ref(x, Node::right); - if self.node_ref(w, Node::is_red) { - self.node_mut(w, Node::set_color(Color::Black)); - self.parent_mut(x, Node::set_color(Color::Red)); - self.left_rotate(self.node_ref(x, Node::parent)); - w = self.parent_ref(x, Node::right); - } - if self.node_ref(w, Node::is_sentinel) { - break; - } - if self.left_ref(w, Node::is_black) && self.right_ref(w, Node::is_black) { - self.node_mut(w, Node::set_color(Color::Red)); - x = self.node_ref(x, Node::parent); - } else { - if self.right_ref(w, Node::is_black) { - self.left_mut(w, Node::set_color(Color::Black)); - self.node_mut(w, Node::set_color(Color::Red)); - self.right_rotate(w); - w = self.parent_ref(x, Node::right); - } - self.node_mut(w, Node::set_color(self.parent_ref(x, Node::color))); - self.parent_mut(x, Node::set_color(Color::Black)); - self.right_mut(w, Node::set_color(Color::Black)); - self.left_rotate(self.node_ref(x, Node::parent)); - x = self.root; - } - } else { - w = self.parent_ref(x, Node::left); - if self.node_ref(w, Node::is_red) { - self.node_mut(w, Node::set_color(Color::Black)); - self.parent_mut(x, Node::set_color(Color::Red)); - self.right_rotate(self.node_ref(x, Node::parent)); - w = self.parent_ref(x, Node::left); - } - if self.node_ref(w, Node::is_sentinel) { - break; - } - if self.right_ref(w, Node::is_black) && self.left_ref(w, Node::is_black) { - self.node_mut(w, Node::set_color(Color::Red)); - x = self.node_ref(x, Node::parent); - } else { - if self.left_ref(w, Node::is_black) { - self.right_mut(w, Node::set_color(Color::Black)); - self.node_mut(w, Node::set_color(Color::Red)); - self.left_rotate(w); - w = self.parent_ref(x, Node::left); - } - self.node_mut(w, Node::set_color(self.parent_ref(x, Node::color))); - self.parent_mut(x, Node::set_color(Color::Black)); - self.left_mut(w, Node::set_color(Color::Black)); - self.right_rotate(self.node_ref(x, Node::parent)); - x = self.root; - } - } - } - self.node_mut(x, Node::set_color(Color::Black)); - } - - /// Binary tree left rotate. - fn left_rotate(&mut self, x: NodeIndex) { - if self.right_ref(x, Node::is_sentinel) { - return; - } - let y = self.node_ref(x, Node::right); - self.node_mut(x, Node::set_right(self.node_ref(y, Node::left))); - if !self.left_ref(y, Node::is_sentinel) { - self.left_mut(y, Node::set_parent(x)); - } - - self.replace_parent(x, y); - self.node_mut(y, Node::set_left(x)); - - self.rotate_update_max(x, y); - } - - /// Binary tree right rotate. - fn right_rotate(&mut self, x: NodeIndex) { - if self.left_ref(x, Node::is_sentinel) { - return; - } - let y = self.node_ref(x, Node::left); - self.node_mut(x, Node::set_left(self.node_ref(y, Node::right))); - if !self.right_ref(y, Node::is_sentinel) { - self.right_mut(y, Node::set_parent(x)); - } - - self.replace_parent(x, y); - self.node_mut(y, Node::set_right(x)); - - self.rotate_update_max(x, y); - } - - /// Replaces parent during a rotation. - fn replace_parent(&mut self, x: NodeIndex, y: NodeIndex) { - self.node_mut(y, Node::set_parent(self.node_ref(x, Node::parent))); - if self.parent_ref(x, Node::is_sentinel) { - self.root = y; - } else if self.is_left_child(x) { - self.parent_mut(x, Node::set_left(y)); - } else { - self.parent_mut(x, Node::set_right(y)); - } - self.node_mut(x, Node::set_parent(y)); - } - - /// Updates the max value after a rotation. - fn rotate_update_max(&mut self, x: NodeIndex, y: NodeIndex) { - self.node_mut(y, Node::set_max_index(self.node_ref(x, Node::max_index))); - self.recaculate_max(x); - } - - /// Updates the max value towards the root - fn update_max_bottom_up(&mut self, x: NodeIndex) { - let mut p = x; - while !self.node_ref(p, Node::is_sentinel) { - self.recaculate_max(p); - p = self.node_ref(p, Node::parent); - } - } - - /// Recaculate max value from left and right childrens - fn recaculate_max(&mut self, x: NodeIndex) { - self.node_mut(x, Node::set_max_index(x)); - let x_left = self.node_ref(x, Node::left); - let x_right = self.node_ref(x, Node::right); - if self.max(x_left) > self.max(x) { - self.node_mut( - x, - Node::set_max_index(self.node_ref(x_left, Node::max_index)), - ); - } - if self.max(x_right) > self.max(x) { - self.node_mut( - x, - Node::set_max_index(self.node_ref(x_right, Node::max_index)), - ); - } - } - - /// Finds the node with the minimum interval. - fn tree_minimum(&self, mut x: NodeIndex) -> NodeIndex { - while !self.left_ref(x, Node::is_sentinel) { - x = self.node_ref(x, Node::left); - } - x - } - - /// Replaces one subtree as a child of its parent with another subtree. - fn transplant(&mut self, u: NodeIndex, v: NodeIndex) { - if self.parent_ref(u, Node::is_sentinel) { - self.root = v; - } else if self.is_left_child(u) { - self.parent_mut(u, Node::set_left(v)); - } else { - self.parent_mut(u, Node::set_right(v)); - } - self.node_mut(v, Node::set_parent(self.node_ref(u, Node::parent))); - } - - /// Checks if a node is a left child of its parent. - fn is_left_child(&self, node: NodeIndex) -> bool { - self.parent_ref(node, Node::left) == node - } - - /// Checks if a node is a right child of its parent. - fn is_right_child(&self, node: NodeIndex) -> bool { - self.parent_ref(node, Node::right) == node - } - - /// Updates nodes indices after remove - /// - /// This method has a time complexity of `O(logn)`, as we need to - /// update the max index from bottom to top. - fn update_idx(&mut self, old: NodeIndex, new: NodeIndex) { - if self.root == old { - self.root = new; - } - if self.nodes.get(new.index()).is_some() { - if !self.parent_ref(new, Node::is_sentinel) { - if self.parent_ref(new, Node::left) == old { - self.parent_mut(new, Node::set_left(new)); - } else { - self.parent_mut(new, Node::set_right(new)); - } - } - self.left_mut(new, Node::set_parent(new)); - self.right_mut(new, Node::set_parent(new)); - - let mut p = new; - while !self.node_ref(p, Node::is_sentinel) { - if self.node_ref(p, Node::max_index) == old { - self.node_mut(p, Node::set_max_index(new)); - } - p = self.node_ref(p, Node::parent); - } - } - } -} - -// Convenient methods for reference or mutate current/parent/left/right node -#[allow(clippy::missing_docs_in_private_items)] // Trivial convenient methods -#[allow(clippy::indexing_slicing)] // Won't panic since all the indices we used are inbound -impl<'a, T, V, Ix> IntervalMap -where - Ix: IndexType, -{ - fn node_ref(&'a self, node: NodeIndex, op: F) -> R - where - R: 'a, - F: FnOnce(&'a Node) -> R, - { - op(&self.nodes[node.index()]) - } - - fn node_mut(&'a mut self, node: NodeIndex, op: F) -> R - where - R: 'a, - F: FnOnce(&'a mut Node) -> R, - { - op(&mut self.nodes[node.index()]) - } - - fn left_ref(&'a self, node: NodeIndex, op: F) -> R - where - R: 'a, - F: FnOnce(&'a Node) -> R, - { - let idx = self.nodes[node.index()].left().index(); - op(&self.nodes[idx]) - } - - fn right_ref(&'a self, node: NodeIndex, op: F) -> R - where - R: 'a, - F: FnOnce(&'a Node) -> R, - { - let idx = self.nodes[node.index()].right().index(); - op(&self.nodes[idx]) - } - - fn parent_ref(&'a self, node: NodeIndex, op: F) -> R - where - R: 'a, - F: FnOnce(&'a Node) -> R, - { - let idx = self.nodes[node.index()].parent().index(); - op(&self.nodes[idx]) - } - - fn grand_parent_ref(&'a self, node: NodeIndex, op: F) -> R - where - R: 'a, - F: FnOnce(&'a Node) -> R, - { - let parent_idx = self.nodes[node.index()].parent().index(); - let grand_parent_idx = self.nodes[parent_idx].parent().index(); - op(&self.nodes[grand_parent_idx]) - } - - fn left_mut(&'a mut self, node: NodeIndex, op: F) -> R - where - R: 'a, - F: FnOnce(&'a mut Node) -> R, - { - let idx = self.nodes[node.index()].left().index(); - op(&mut self.nodes[idx]) - } - - fn right_mut(&'a mut self, node: NodeIndex, op: F) -> R - where - R: 'a, - F: FnOnce(&'a mut Node) -> R, - { - let idx = self.nodes[node.index()].right().index(); - op(&mut self.nodes[idx]) - } - - fn parent_mut(&'a mut self, node: NodeIndex, op: F) -> R - where - R: 'a, - F: FnOnce(&'a mut Node) -> R, - { - let idx = self.nodes[node.index()].parent().index(); - op(&mut self.nodes[idx]) - } - - fn grand_parent_mut(&'a mut self, node: NodeIndex, op: F) -> R - where - R: 'a, - F: FnOnce(&'a mut Node) -> R, - { - let parent_idx = self.nodes[node.index()].parent().index(); - let grand_parent_idx = self.nodes[parent_idx].parent().index(); - op(&mut self.nodes[grand_parent_idx]) - } - - fn max(&self, node: NodeIndex) -> Option<&T> { - let max_index = self.nodes[node.index()].max_index?.index(); - self.nodes[max_index].interval.as_ref().map(|i| &i.high) - } -} - -/// An iterator over the entries of a `IntervalMap`. -#[derive(Debug)] -pub struct Iter<'a, T, V, Ix> { - /// Reference to the map - map_ref: &'a IntervalMap, - /// Stack for iteration - stack: Option>>, -} - -impl Iter<'_, T, V, Ix> -where - Ix: IndexType, -{ - /// Initializes the stack - fn init_stack(&mut self) { - self.stack = Some(Self::left_link(self.map_ref, self.map_ref.root)); - } - - /// Pushes a link of nodes on the left to stack. - fn left_link(map_ref: &IntervalMap, mut x: NodeIndex) -> Vec> { - let mut nodes = vec![]; - while !map_ref.node_ref(x, Node::is_sentinel) { - nodes.push(x); - x = map_ref.node_ref(x, Node::left); - } - nodes - } -} - -impl<'a, T, V, Ix> Iterator for Iter<'a, T, V, Ix> -where - Ix: IndexType, -{ - type Item = (&'a Interval, &'a V); - - #[allow(clippy::unwrap_used, clippy::unwrap_in_result)] - #[inline] - fn next(&mut self) -> Option { - if self.stack.is_none() { - self.init_stack(); - } - let stack = self.stack.as_mut().unwrap(); - if stack.is_empty() { - return None; - } - let x = stack.pop().unwrap(); - stack.extend(Self::left_link( - self.map_ref, - self.map_ref.node_ref(x, Node::right), - )); - Some(self.map_ref.node_ref(x, |xn| (xn.interval(), xn.value()))) - } -} - -/// A view into a single entry in a map, which may either be vacant or occupied. -#[allow(clippy::exhaustive_enums)] // It is final -#[derive(Debug)] -pub enum Entry<'a, T, V, Ix> { - /// An occupied entry. - Occupied(OccupiedEntry<'a, T, V, Ix>), - /// A vacant entry. - Vacant(VacantEntry<'a, T, V, Ix>), -} - -/// A view into an occupied entry in a `IntervalMap`. -/// It is part of the [`Entry`] enum. -#[derive(Debug)] -pub struct OccupiedEntry<'a, T, V, Ix> { - /// Reference to the map - map_ref: &'a mut IntervalMap, - /// The entry node - node: NodeIndex, -} - -/// A view into a vacant entry in a `IntervalMap`. -/// It is part of the [`Entry`] enum. -#[derive(Debug)] -pub struct VacantEntry<'a, T, V, Ix> { - /// Mutable reference to the map - map_ref: &'a mut IntervalMap, - /// The interval of this entry - interval: Interval, -} - -impl<'a, T, V, Ix> Entry<'a, T, V, Ix> -where - T: Ord, - Ix: IndexType, -{ - /// Ensures a value is in the entry by inserting the default if empty, and returns - /// a mutable reference to the value in the entry. - #[inline] - pub fn or_insert(self, default: V) -> &'a mut V { - match self { - Entry::Occupied(entry) => entry.map_ref.node_mut(entry.node, Node::value_mut), - Entry::Vacant(entry) => { - let entry_idx = NodeIndex::new(entry.map_ref.nodes.len()); - let _ignore = entry.map_ref.insert(entry.interval, default); - entry.map_ref.node_mut(entry_idx, Node::value_mut) - } - } - } - - /// Provides in-place mutable access to an occupied entry before any - /// potential inserts into the map. - /// - /// # Panics - /// - /// This method panics when the node is a sentinel node - #[inline] - #[must_use] - pub fn and_modify(self, f: F) -> Self - where - F: FnOnce(&mut V), - { - match self { - Entry::Occupied(entry) => { - f(entry.map_ref.node_mut(entry.node, Node::value_mut)); - Self::Occupied(entry) - } - Entry::Vacant(entry) => Self::Vacant(entry), - } - } -} - -// TODO: better typed `Node` -/// Node of the interval tree -#[derive(Debug)] -pub struct Node { - /// Left children - left: Option>, - /// Right children - right: Option>, - /// Parent - parent: Option>, - /// Color of the node - color: Color, - - /// Interval of the node - interval: Option>, - /// The index that point to the node with the max value - max_index: Option>, - /// Value of the node - value: Option, -} - -// Convenient getter/setter methods -#[allow(clippy::missing_docs_in_private_items)] -#[allow(clippy::missing_docs_in_private_items)] // Trivial convenient methods -#[allow(clippy::unwrap_used)] // Won't panic since the conditions are checked in the implementation -impl Node -where - Ix: IndexType, -{ - fn color(&self) -> Color { - self.color - } - - fn interval(&self) -> &Interval { - self.interval.as_ref().unwrap() - } - - fn max_index(&self) -> NodeIndex { - self.max_index.unwrap() - } - - fn left(&self) -> NodeIndex { - self.left.unwrap() - } - - fn right(&self) -> NodeIndex { - self.right.unwrap() - } - - fn parent(&self) -> NodeIndex { - self.parent.unwrap() - } - - fn is_sentinel(&self) -> bool { - self.interval.is_none() - } - - fn sentinel(&self) -> Option<&Self> { - self.interval.is_some().then_some(self) - } - - fn is_black(&self) -> bool { - matches!(self.color, Color::Black) - } - - fn is_red(&self) -> bool { - matches!(self.color, Color::Red) - } - - fn value(&self) -> &V { - self.value.as_ref().unwrap() - } - - fn value_mut(&mut self) -> &mut V { - self.value.as_mut().unwrap() - } - - fn take_value(&mut self) -> V { - self.value.take().unwrap() - } - - fn set_value(value: V) -> impl FnOnce(&mut Node) -> V { - move |node: &mut Node| node.value.replace(value).unwrap() - } - - fn set_color(color: Color) -> impl FnOnce(&mut Node) { - move |node: &mut Node| { - node.color = color; - } - } - - fn set_max_index(max_index: NodeIndex) -> impl FnOnce(&mut Node) { - move |node: &mut Node| { - let _ignore = node.max_index.replace(max_index); - } - } - - fn set_left(left: NodeIndex) -> impl FnOnce(&mut Node) { - move |node: &mut Node| { - let _ignore = node.left.replace(left); - } - } - - fn set_right(right: NodeIndex) -> impl FnOnce(&mut Node) { - move |node: &mut Node| { - let _ignore = node.right.replace(right); - } - } - - fn set_parent(parent: NodeIndex) -> impl FnOnce(&mut Node) { - move |node: &mut Node| { - let _ignore = node.parent.replace(parent); - } - } -} - -/// The Interval stored in `IntervalMap` -/// Represents the interval [low, high) -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -#[non_exhaustive] -pub struct Interval { - /// Low value - pub low: T, - /// high value - pub high: T, -} - -impl Interval { - /// Creates a new `Interval` - /// - /// # Panics - /// - /// This method panics when low is greater than high - #[inline] - pub fn new(low: T, high: T) -> Self { - assert!(low < high, "invalid range"); - Self { low, high } - } - - /// Checks if self overlaps with other interval - #[inline] - pub fn overlap(&self, other: &Self) -> bool { - self.high > other.low && other.high > self.low - } -} - -/// Reference type of `Interval` -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -struct IntervalRef<'a, T> { - /// Low value - low: &'a T, - /// high value - high: &'a T, -} - -impl<'a, T: Ord> IntervalRef<'a, T> { - /// Creates a new `IntervalRef` - /// - /// # Panics - /// - /// This method panics when low is greater than high - #[inline] - fn new(low: &'a T, high: &'a T) -> Self { - assert!(low < high, "invalid range"); - Self { low, high } - } - - /// Checks if self overlaps with a `Interval` - fn overlap(&self, other: &Interval) -> bool { - self.high > &other.low && &other.high > self.low - } -} - -/// The color of the node -#[derive(Debug, Clone, Copy)] -enum Color { - /// Red node - Red, - /// Black node - Black, -} diff --git a/crates/utils/src/interval_map/tests.rs b/crates/utils/src/interval_map/tests.rs deleted file mode 100644 index ca63a5c51..000000000 --- a/crates/utils/src/interval_map/tests.rs +++ /dev/null @@ -1,322 +0,0 @@ -use std::collections::HashSet; - -use rand::{rngs::StdRng, Rng, SeedableRng}; - -use super::*; - -struct IntervalGenerator { - rng: StdRng, - unique: HashSet>, - limit: i32, -} - -impl IntervalGenerator { - fn new(seed: [u8; 32]) -> Self { - const LIMIT: i32 = 1000; - Self { - rng: SeedableRng::from_seed(seed), - unique: HashSet::new(), - limit: LIMIT, - } - } - - fn next(&mut self) -> Interval { - let low = self.rng.gen_range(0..self.limit - 1); - let high = self.rng.gen_range((low + 1)..self.limit); - Interval::new(low, high) - } - - fn next_unique(&mut self) -> Interval { - let mut interval = self.next(); - while self.unique.contains(&interval) { - interval = self.next(); - } - self.unique.insert(interval.clone()); - interval - } - - fn next_with_range(&mut self, range: i32) -> Interval { - let low = self.rng.gen_range(0..self.limit - 1); - let high = self - .rng - .gen_range((low + 1)..self.limit.min(low + 1 + range)); - Interval::new(low, high) - } -} - -impl IntervalMap { - fn check_max(&self) { - let _ignore = self.check_max_inner(self.root); - } - - fn check_max_inner(&self, x: NodeIndex) -> i32 { - if self.node_ref(x, Node::is_sentinel) { - return 0; - } - let l_max = self.check_max_inner(self.node_ref(x, Node::left)); - let r_max = self.check_max_inner(self.node_ref(x, Node::right)); - let max = self.node_ref(x, |x| x.interval().high.max(l_max).max(r_max)); - assert_eq!(self.max(x), Some(&max)); - max - } - - /// 1. Every node is either red or black. - /// 2. The root is black. - /// 3. Every leaf (NIL) is black. - /// 4. If a node is red, then both its children are black. - /// 5. For each node, all simple paths from the node to descendant leaves contain the - /// same number of black nodes. - fn check_rb_properties(&self) { - assert!(matches!( - self.node_ref(self.root, Node::color), - Color::Black - )); - self.check_children_color(self.root); - self.check_black_height(self.root); - } - - fn check_children_color(&self, x: NodeIndex) { - if self.node_ref(x, Node::is_sentinel) { - return; - } - self.check_children_color(self.node_ref(x, Node::left)); - self.check_children_color(self.node_ref(x, Node::right)); - if self.node_ref(x, Node::is_red) { - assert!(matches!(self.left_ref(x, Node::color), Color::Black)); - assert!(matches!(self.right_ref(x, Node::color), Color::Black)); - } - } - - fn check_black_height(&self, x: NodeIndex) -> usize { - if self.node_ref(x, Node::is_sentinel) { - return 0; - } - let lefth = self.check_black_height(self.node_ref(x, Node::left)); - let righth = self.check_black_height(self.node_ref(x, Node::right)); - assert_eq!(lefth, righth); - if self.node_ref(x, Node::is_black) { - return lefth + 1; - } - lefth - } -} - -fn with_map_and_generator(test_fn: impl Fn(IntervalMap, IntervalGenerator)) { - let seeds = vec![[0; 32], [1; 32], [2; 32]]; - for seed in seeds { - let gen = IntervalGenerator::new(seed); - let map = IntervalMap::new(); - test_fn(map, gen); - } -} - -#[test] -fn red_black_tree_properties_is_satisfied() { - with_map_and_generator(|mut map, mut gen| { - let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) - .take(1000) - .collect(); - for i in intervals.clone() { - let _ignore = map.insert(i, ()); - } - map.check_rb_properties(); - }); -} - -#[test] -#[should_panic(expected = "invalid range")] -fn invalid_range_should_panic() { - let _interval = Interval::new(3, 1); -} - -#[test] -fn insert_equal_interval_returns_previous_value() { - let mut map = IntervalMap::new(); - map.insert(Interval::new(1, 3), 1); - assert_eq!(map.insert(Interval::new(1, 3), 2), Some(1)); - assert_eq!(map.insert(Interval::new(1, 3), 3), Some(2)); -} - -#[test] -fn map_len_will_update() { - with_map_and_generator(|mut map, mut gen| { - let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) - .take(100) - .collect(); - for i in intervals.clone() { - let _ignore = map.insert(i, ()); - } - assert_eq!(map.len(), 100); - for i in intervals { - let _ignore = map.remove(&i); - } - assert_eq!(map.len(), 0); - }); -} - -#[test] -fn check_overlap_is_ok_simple() { - let mut map = IntervalMap::new(); - map.insert(Interval::new(1, 3), ()); - map.insert(Interval::new(6, 7), ()); - map.insert(Interval::new(9, 11), ()); - assert!(map.overlap(&Interval::new(2, 5))); - assert!(map.overlap(&Interval::new(1, 17))); - assert!(!map.overlap(&Interval::new(4, 5))); - assert!(!map.overlap(&Interval::new(20, 23))); -} - -#[test] -fn check_overlap_is_ok() { - with_map_and_generator(|mut map, mut gen| { - let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_with_range(10)) - .take(100) - .collect(); - for i in intervals.clone() { - let _ignore = map.insert(i, ()); - } - let to_check: Vec<_> = std::iter::repeat_with(|| gen.next_with_range(10)) - .take(1000) - .collect(); - let expects: Vec<_> = to_check - .iter() - .map(|ci| intervals.iter().any(|i| ci.overlap(i))) - .collect(); - - for (ci, expect) in to_check.into_iter().zip(expects.into_iter()) { - assert_eq!(map.overlap(&ci), expect); - } - }); -} - -#[test] -fn check_max_is_ok() { - with_map_and_generator(|mut map, mut gen| { - let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) - .take(1000) - .collect(); - for i in intervals.clone() { - let _ignore = map.insert(i, ()); - map.check_max(); - } - assert_eq!(map.len(), 1000); - for i in intervals { - let _ignore = map.remove(&i); - map.check_max(); - } - }); -} - -#[test] -fn remove_non_exist_interval_will_do_nothing() { - with_map_and_generator(|mut map, mut gen| { - let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) - .take(1000) - .collect(); - for i in intervals { - let _ignore = map.insert(i, ()); - } - assert_eq!(map.len(), 1000); - let to_remove: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) - .take(1000) - .collect(); - for i in to_remove { - let _ignore = map.remove(&i); - } - assert_eq!(map.len(), 1000); - }); -} - -#[test] -fn find_all_overlap_is_ok_simple() { - let mut map = IntervalMap::new(); - map.insert(Interval::new(1, 3), ()); - map.insert(Interval::new(2, 4), ()); - map.insert(Interval::new(6, 7), ()); - map.insert(Interval::new(7, 11), ()); - assert_eq!(map.find_all_overlap(&Interval::new(2, 7)).len(), 3); - map.remove(&Interval::new(1, 3)); - assert_eq!(map.find_all_overlap(&Interval::new(2, 7)).len(), 2); -} - -#[test] -fn find_all_overlap_is_ok() { - with_map_and_generator(|mut map, mut gen| { - let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) - .take(1000) - .collect(); - for i in intervals.clone() { - let _ignore = map.insert(i, ()); - } - let to_find: Vec<_> = std::iter::repeat_with(|| gen.next()).take(1000).collect(); - - let expects: Vec> = to_find - .iter() - .map(|ti| intervals.iter().filter(|i| ti.overlap(i)).collect()) - .collect(); - - for (ti, mut expect) in to_find.into_iter().zip(expects.into_iter()) { - let mut result = map.find_all_overlap(&ti); - expect.sort_unstable(); - result.sort_unstable(); - assert_eq!(expect.len(), result.len()); - for (e, r) in expect.into_iter().zip(result.into_iter()) { - assert_eq!(e, r.0); - } - } - }); -} - -#[test] -fn entry_modify_is_ok() { - let mut map = IntervalMap::new(); - map.insert(Interval::new(1, 3), 1); - map.insert(Interval::new(2, 4), 2); - map.insert(Interval::new(6, 7), 3); - map.insert(Interval::new(7, 11), 4); - let _ignore = map.entry(Interval::new(6, 7)).and_modify(|v| *v += 1); - assert_eq!(map.get(&Interval::new(1, 3)), Some(&1)); - assert_eq!(map.get(&Interval::new(2, 4)), Some(&2)); - assert_eq!(map.get(&Interval::new(6, 7)), Some(&4)); - assert_eq!(map.get(&Interval::new(7, 11)), Some(&4)); - assert_eq!(map.get(&Interval::new(5, 17)), None); - map.entry(Interval::new(3, 5)) - .and_modify(|v| *v += 1) - .or_insert(0); - let _ignore = map.get_mut(&Interval::new(3, 5)).map(|v| *v += 1); - assert_eq!(map.get(&Interval::new(3, 5)), Some(&1)); -} - -#[test] -fn iterate_through_map_is_sorted() { - with_map_and_generator(|mut map, mut gen| { - let mut intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) - .enumerate() - .take(1000) - .collect(); - for (v, i) in intervals.clone() { - let _ignore = map.insert(i, v); - } - intervals.sort_unstable_by(|a, b| a.1.cmp(&b.1)); - - #[allow(clippy::pattern_type_mismatch)] - for ((ei, ev), (v, i)) in map.iter().zip(intervals.iter()) { - assert_eq!(ei, i); - assert_eq!(ev, v); - } - }); -} - -#[test] -fn interval_map_clear_is_ok() { - let mut map = IntervalMap::new(); - map.insert(Interval::new(1, 3), 1); - map.insert(Interval::new(2, 4), 2); - map.insert(Interval::new(6, 7), 3); - assert_eq!(map.len(), 3); - map.clear(); - assert_eq!(map.len(), 0); - assert!(map.is_empty()); - assert_eq!(map.nodes.len(), 1); - assert!(map.nodes[0].is_sentinel()); -} diff --git a/crates/utils/src/lib.rs b/crates/utils/src/lib.rs index 44fbdbf2e..de346a983 100644 --- a/crates/utils/src/lib.rs +++ b/crates/utils/src/lib.rs @@ -188,8 +188,6 @@ pub struct ServerTlsConfig; pub mod barrier; /// configuration pub mod config; -/// Interval tree implementation -pub mod interval_map; /// utils for metrics pub mod metrics; /// utils of `parking_lot` lock @@ -211,6 +209,8 @@ pub mod tokio_lock; pub mod tracing; use ::tracing::debug; +/// Interval tree implementation +pub use interval_map; pub use parser::*; use pbkdf2::{ password_hash::{rand_core::OsRng, PasswordHasher, SaltString}, diff --git a/crates/xline/src/conflict/spec_pool.rs b/crates/xline/src/conflict/spec_pool.rs index 8bcfb41ec..b87375b16 100644 --- a/crates/xline/src/conflict/spec_pool.rs +++ b/crates/xline/src/conflict/spec_pool.rs @@ -70,7 +70,7 @@ impl ConflictPoolOp for KvSpecPool { impl SpeculativePoolOp for KvSpecPool { fn insert_if_not_conflict(&mut self, entry: Self::Entry) -> Option { let intervals = intervals(&self.lease_collection, &entry); - if intervals.iter().any(|i| self.map.overlap(i)) { + if intervals.iter().any(|i| self.map.overlaps(i)) { return Some(entry); } assert!( diff --git a/crates/xline/src/conflict/uncommitted_pool.rs b/crates/xline/src/conflict/uncommitted_pool.rs index ba02ed5ca..d9921e6c2 100644 --- a/crates/xline/src/conflict/uncommitted_pool.rs +++ b/crates/xline/src/conflict/uncommitted_pool.rs @@ -85,7 +85,7 @@ impl UncommittedPoolOp for KvUncomPool { fn insert(&mut self, entry: Self::Entry) -> bool { let intervals = intervals(&self.lease_collection, &entry); let _ignore = self.intervals.insert(entry.id(), intervals.clone()); - let conflict = intervals.iter().any(|i| self.map.overlap(i)); + let conflict = intervals.iter().any(|i| self.map.overlaps(i)); for interval in intervals { let e = self.map.entry(interval).or_insert(Commands::default()); e.push_cmd(entry.clone()); diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 69ae00c51..92723c697 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -30,8 +30,8 @@ madsim-tokio = { git = "https://github.com/LucienY01/madsim.git", branch = "bz/t madsim-tonic = { git = "https://github.com/LucienY01/madsim.git", branch = "bz/tonic-0-12", default-features = false, features = ["tls"] } memchr = { version = "2" } opentelemetry_sdk = { version = "0.24", features = ["rt-tokio"] } -petgraph = { version = "0.6" } predicates = { version = "3", default-features = false, features = ["diff"] } +rand = { version = "0.8", features = ["small_rng"] } serde = { version = "1", features = ["derive", "rc"] } serde_json = { version = "1", features = ["raw_value"] } sha2 = { version = "0.10" } @@ -55,7 +55,6 @@ itertools = { version = "0.13" } libc = { version = "0.2", features = ["extra_traits"] } log = { version = "0.4", default-features = false, features = ["std"] } memchr = { version = "2" } -petgraph = { version = "0.6" } predicates = { version = "3", default-features = false, features = ["diff"] } syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-traits", "full"] } syn-f595c2ba2a3f28df = { package = "syn", version = "2", features = ["extra-traits", "full", "visit", "visit-mut"] } From 878a2022f3ca0e2b14fa62f356524c196442f97f Mon Sep 17 00:00:00 2001 From: feathercyc Date: Sun, 25 Aug 2024 11:45:07 +0000 Subject: [PATCH 050/322] fix: correct request validation Signed-off-by: feathercyc --- crates/utils/src/lca_tree.rs | 175 ++++++++++++++++++++++ crates/utils/src/lib.rs | 2 + crates/xlineapi/src/request_validation.rs | 149 +++++++++++++----- 3 files changed, 287 insertions(+), 39 deletions(-) create mode 100644 crates/utils/src/lca_tree.rs diff --git a/crates/utils/src/lca_tree.rs b/crates/utils/src/lca_tree.rs new file mode 100644 index 000000000..9e76ad135 --- /dev/null +++ b/crates/utils/src/lca_tree.rs @@ -0,0 +1,175 @@ +use std::ops::{Add, Sub as _}; + +/// A LCA tree to accelerate Txns' key overlap validation +#[non_exhaustive] +#[derive(Debug)] +pub struct LCATree { + /// + nodes: Vec, +} + +/// +#[non_exhaustive] +#[derive(Debug)] +pub struct LCANode { + /// + pub parent: Vec, + /// + pub depth: usize, +} + +#[allow(clippy::indexing_slicing)] +impl LCATree { + /// build a `LCATree` with a sentinel node + #[inline] + #[must_use] + pub fn new() -> Self { + Self { + nodes: vec![LCANode { + parent: vec![0], + depth: 0, + }], + } + } + /// get a node by index + /// + /// # Panics + /// + /// The function panics if given `i` > max index + #[inline] + #[must_use] + pub fn get_node(&self, i: usize) -> &LCANode { + assert!(i < self.nodes.len(), "Node {i} doesn't exist"); + &self.nodes[i] + } + /// insert a node and return its index + /// + /// # Panics + /// + /// The function panics if given `parent` doesn't exist + #[inline] + #[must_use] + #[allow(clippy::as_conversions)] + pub fn insert_node(&mut self, parent: usize) -> usize { + let depth = if parent == 0 { + 0 + } else { + self.get_node(parent).depth.add(1) + }; + let mut node = LCANode { + parent: vec![], + depth, + }; + node.parent.push(parent); + let parent_num = if depth == 0 { 0 } else { depth.ilog2() } as usize; + for i in 0..parent_num { + node.parent.push(self.get_node(node.parent[i]).parent[i]); + } + self.nodes.push(node); + self.nodes.len().sub(1) + } + /// Use Binary Lifting to find the LCA of `node_a` and `node_b` + /// + /// # Panics + /// + /// The function panics if given `node_a` or `node_b` doesn't exist + #[inline] + #[must_use] + pub fn find_lca(&self, node_a: usize, node_b: usize) -> usize { + let (mut x, mut y) = if self.get_node(node_a).depth < self.get_node(node_b).depth { + (node_a, node_b) + } else { + (node_b, node_a) + }; + while self.get_node(x).depth < self.get_node(y).depth { + for ancestor in self.get_node(y).parent.iter().rev() { + if self.get_node(x).depth <= self.get_node(*ancestor).depth { + y = *ancestor; + } + } + } + while x != y { + let node_x = self.get_node(x); + let node_y = self.get_node(y); + if node_x.parent[0] == node_y.parent[0] { + x = node_x.parent[0]; + break; + } + for i in (0..node_x.parent.len()).rev() { + if node_x.parent[i] != node_y.parent[i] { + x = node_x.parent[i]; + y = node_y.parent[i]; + break; + } + } + } + x + } +} + +impl Default for LCATree { + #[inline] + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod test { + use crate::lca_tree::LCATree; + + #[test] + fn test_ilog2() { + assert_eq!(3_i32.ilog2(), 1); + assert_eq!(5_i32.ilog2(), 2); + assert_eq!(7_i32.ilog2(), 2); + assert_eq!(10_i32.ilog2(), 3); + } + + #[test] + // root + // / | \ + // / | \ + // / | \ + // node1 node2 node3 + // | \ | | + // | \ | | + // node4 node5 node6 node7 + // | \ \ + // | \ node10 + // node8 node9 + // + // + fn test_lca() { + let mut tree = LCATree::new(); + let root = 0; + let node1 = tree.insert_node(root); + let node2 = tree.insert_node(root); + let node3 = tree.insert_node(root); + + let node4 = tree.insert_node(node1); + let node5 = tree.insert_node(node1); + + let node6 = tree.insert_node(node2); + + let node7 = tree.insert_node(node3); + + let node8 = tree.insert_node(node4); + let node9 = tree.insert_node(node4); + + let node10 = tree.insert_node(node5); + + assert_eq!(tree.find_lca(node1, node2), root); + assert_eq!(tree.find_lca(node1, node3), root); + assert_eq!(tree.find_lca(node1, node4), node1); + assert_eq!(tree.find_lca(node4, node5), node1); + assert_eq!(tree.find_lca(node5, node7), root); + assert_eq!(tree.find_lca(node6, node7), root); + assert_eq!(tree.find_lca(node8, node9), node4); + assert_eq!(tree.find_lca(node8, node10), node1); + assert_eq!(tree.find_lca(node6, node10), root); + assert_eq!(tree.find_lca(node8, node5), node1); + assert_eq!(tree.find_lca(node9, node3), root); + assert_eq!(tree.find_lca(node10, node2), root); + } +} diff --git a/crates/utils/src/lib.rs b/crates/utils/src/lib.rs index de346a983..88aca819a 100644 --- a/crates/utils/src/lib.rs +++ b/crates/utils/src/lib.rs @@ -188,6 +188,8 @@ pub struct ServerTlsConfig; pub mod barrier; /// configuration pub mod config; +/// LCA tree implementation +pub mod lca_tree; /// utils for metrics pub mod metrics; /// utils of `parking_lot` lock diff --git a/crates/xlineapi/src/request_validation.rs b/crates/xlineapi/src/request_validation.rs index ff6ff9a86..a85ce07be 100644 --- a/crates/xlineapi/src/request_validation.rs +++ b/crates/xlineapi/src/request_validation.rs @@ -1,10 +1,12 @@ -use std::collections::HashSet; +use std::collections::{hash_map::Entry, HashMap}; use serde::{Deserialize, Serialize}; use thiserror::Error; +use utils::interval_map::{Interval, IntervalMap}; +use utils::lca_tree::LCATree; use crate::{ - command::KeyRange, AuthRoleAddRequest, AuthRoleGrantPermissionRequest, AuthUserAddRequest, + interval::BytesAffine, AuthRoleAddRequest, AuthRoleGrantPermissionRequest, AuthUserAddRequest, DeleteRangeRequest, PutRequest, RangeRequest, Request, RequestOp, SortOrder, SortTarget, TxnRequest, }; @@ -85,61 +87,133 @@ impl RequestValidator for TxnRequest { } } - let _ignore_success = check_intervals(&self.success)?; - let _ignore_failure = check_intervals(&self.failure)?; + check_intervals(&self.success)?; + check_intervals(&self.failure)?; Ok(()) } } -/// Check if puts and deletes overlap -fn check_intervals(ops: &[RequestOp]) -> Result<(HashSet<&[u8]>, Vec), ValidationError> { - // TODO: use interval tree is better? +type DelsIntervalMap<'a> = IntervalMap>; - let mut dels = Vec::new(); +fn new_bytes_affine_interval(start: &[u8], key_end: &[u8]) -> Interval { + let high = match key_end { + &[] => { + let mut end = start.to_vec(); + end.push(0); + BytesAffine::Bytes(end) + } + &[0] => BytesAffine::Unbounded, + bytes => BytesAffine::Bytes(bytes.to_vec()), + }; + Interval::new(BytesAffine::new_key(start), high) +} - for op in ops { - if let Some(Request::RequestDeleteRange(ref req)) = op.request { - // collect dels - let del = KeyRange::new(req.key.as_slice(), req.range_end.as_slice()); - dels.push(del); +/// Check if puts and deletes overlap +fn check_intervals(ops: &[RequestOp]) -> Result<(), ValidationError> { + let mut lca_tree = LCATree::new(); + // Because `dels` stores Vec corresponding to the interval, merging two `dels` is slightly cumbersome. + // Here, `dels` are directly passed into the build function + let mut dels = DelsIntervalMap::new(); + // This function will traverse all RequestOp and collect all the parent nodes corresponding to `put` and `del` operations. + // During this process, the atomicity of the put operation can be guaranteed. + let puts = build_interval_tree(ops, &mut dels, &mut lca_tree, 0)?; + + // Now we have `dels` and `puts` which contain all node index corresponding to `del` and `put` ops, + // we only need to iterate through the puts to find out whether each put overlaps with the del operation in the dels, + // and even if it overlaps, whether it satisfies lca.depth % 2 == 0. + for (put_key, put_vec) in puts { + let put_interval = new_bytes_affine_interval(put_key, &[]); + let overlaps = dels.find_all_overlap(&put_interval); + for put_node_idx in put_vec { + for (_, del_vec) in overlaps.iter() { + for del_node_idx in del_vec.iter() { + let lca_node_idx = lca_tree.find_lca(put_node_idx, *del_node_idx); + // lca.depth % 2 == 0 means this lca is on a success or failure branch, + // and two nodes on the same branch are prohibited from overlapping. + if lca_tree.get_node(lca_node_idx).depth % 2 == 0 { + return Err(ValidationError::DuplicateKey); + } + } + } } } - let mut puts: HashSet<&[u8]> = HashSet::new(); + Ok(()) +} +fn build_interval_tree<'a>( + ops: &'a [RequestOp], + dels_map: &mut DelsIntervalMap<'a>, + lca_tree: &mut LCATree, + parent: usize, +) -> Result>, ValidationError> { + let mut puts_map: HashMap<&[u8], Vec> = HashMap::new(); for op in ops { - if let Some(Request::RequestTxn(ref req)) = op.request { - // handle child txn request - let (success_puts, mut success_dels) = check_intervals(&req.success)?; - let (failure_puts, mut failure_dels) = check_intervals(&req.failure)?; - - for k in success_puts.union(&failure_puts) { - if !puts.insert(k) { - return Err(ValidationError::DuplicateKey); + match op.request { + Some(Request::RequestDeleteRange(ref req)) => { + // collect dels + let cur_node_idx = lca_tree.insert_node(parent); + let del = new_bytes_affine_interval(req.key.as_slice(), req.range_end.as_slice()); + dels_map.entry(del).or_insert(vec![]).push(cur_node_idx); + } + Some(Request::RequestTxn(ref req)) => { + // RequestTxn is absolutely a node + let cur_node_idx = lca_tree.insert_node(parent); + let success_puts_map = if !req.success.is_empty() { + // success branch is also a node + let success_node_idx = lca_tree.insert_node(cur_node_idx); + build_interval_tree(&req.success, dels_map, lca_tree, success_node_idx)? + } else { + HashMap::new() + }; + let failure_puts_map = if !req.failure.is_empty() { + // failure branch is also a node + let failure_node_idx = lca_tree.insert_node(cur_node_idx); + build_interval_tree(&req.failure, dels_map, lca_tree, failure_node_idx)? + } else { + HashMap::new() + }; + // success_puts_map and failure_puts_map cannot overlap with other op's puts_map. + for (sub_put_key, sub_put_node_idx) in success_puts_map.iter() { + if puts_map.contains_key(sub_put_key) { + return Err(ValidationError::DuplicateKey); + } + puts_map.insert(&sub_put_key, sub_put_node_idx.to_vec()); } - if dels.iter().any(|del| del.contains_key(k)) { - return Err(ValidationError::DuplicateKey); + // but they can overlap with each other + for (sub_put_key, mut sub_put_node_idx) in failure_puts_map.into_iter() { + match puts_map.entry(&sub_put_key) { + Entry::Vacant(_) => { + puts_map.insert(&sub_put_key, sub_put_node_idx); + } + Entry::Occupied(mut put_entry) => { + if !success_puts_map.contains_key(sub_put_key) { + return Err(ValidationError::DuplicateKey); + } + let put_vec = put_entry.get_mut(); + put_vec.append(&mut sub_put_node_idx); + } + }; } } - - dels.append(&mut success_dels); - dels.append(&mut failure_dels); + _ => {} } } - + // put in RequestPut cannot overlap with all puts in RequestTxn for op in ops { - if let Some(Request::RequestPut(ref req)) = op.request { - // check puts in this level - if !puts.insert(&req.key) { - return Err(ValidationError::DuplicateKey); - } - if dels.iter().any(|del| del.contains_key(&req.key)) { - return Err(ValidationError::DuplicateKey); + match op.request { + Some(Request::RequestPut(ref req)) => { + if puts_map.contains_key(&req.key.as_slice()) { + return Err(ValidationError::DuplicateKey); + } + let cur_node_idx = lca_tree.insert_node(parent); + puts_map.insert(&req.key, vec![cur_node_idx]); } + _ => {} } } - Ok((puts, dels)) + Ok(puts_map) } impl RequestValidator for AuthUserAddRequest { @@ -583,9 +657,6 @@ mod test { run_test(testcases); } - // FIXME: This test will fail in the current implementation. - // See https://github.com/xline-kv/Xline/issues/410 for more details - #[ignore] #[test] fn check_intervals_txn_nested_overlap_should_return_error() { let put_op = RequestOp { From d4eeafc42dbecc01df83bbad00e4b4a08d5b317a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 23 Aug 2024 05:57:58 +0000 Subject: [PATCH 051/322] chore(deps): bump crate-ci/typos from 1.23.6 to 1.23.7 Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.23.6 to 1.23.7. - [Release notes](https://github.com/crate-ci/typos/releases) - [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md) - [Commits](https://github.com/crate-ci/typos/compare/v1.23.6...v1.23.7) --- updated-dependencies: - dependency-name: crate-ci/typos dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 2d68484c1..ebea9fcad 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -128,7 +128,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Check Spelling - uses: crate-ci/typos@v1.23.6 + uses: crate-ci/typos@v1.23.7 build: name: Build From 617e341a0bb9315e369e045facde29009c958e6e Mon Sep 17 00:00:00 2001 From: lxl66566 Date: Thu, 18 Jul 2024 09:37:24 +0800 Subject: [PATCH 052/322] refactor(client)!: AuthClient::role_revoke_permission Signed-off-by: lxl66566 fix: add test Signed-off-by: lxl66566 fix(client): fix doc test Signed-off-by: lxl66566 --- crates/xline-client/examples/auth_role.rs | 13 +--- crates/xline-client/src/clients/auth.rs | 33 +++++++--- crates/xline-client/src/types/auth.rs | 65 ------------------- crates/xline-client/tests/it/auth.rs | 18 ++--- .../xlinectl/src/command/role/revoke_perm.rs | 25 +++++-- 5 files changed, 50 insertions(+), 104 deletions(-) diff --git a/crates/xline-client/examples/auth_role.rs b/crates/xline-client/examples/auth_role.rs index a23e686a8..fe09d34ac 100644 --- a/crates/xline-client/examples/auth_role.rs +++ b/crates/xline-client/examples/auth_role.rs @@ -1,8 +1,5 @@ use anyhow::Result; -use xline_client::{ - types::auth::{AuthRoleRevokePermissionRequest, PermissionType}, - Client, ClientOptions, -}; +use xline_client::{types::auth::PermissionType, Client, ClientOptions}; #[tokio::main] async fn main() -> Result<()> { @@ -38,12 +35,8 @@ async fn main() -> Result<()> { } // revoke permissions from roles - client - .role_revoke_permission(AuthRoleRevokePermissionRequest::new("role1", "key1")) - .await?; - client - .role_revoke_permission(AuthRoleRevokePermissionRequest::new("role2", "key2")) - .await?; + client.role_revoke_permission("role1", "key1", None).await?; + client.role_revoke_permission("role2", "key2", None).await?; // delete roles client.role_delete("role1").await?; diff --git a/crates/xline-client/src/clients/auth.rs b/crates/xline-client/src/clients/auth.rs index 44038cc28..e786f4cd6 100644 --- a/crates/xline-client/src/clients/auth.rs +++ b/crates/xline-client/src/clients/auth.rs @@ -9,14 +9,12 @@ use xlineapi::{ AuthUserAddResponse, AuthUserChangePasswordResponse, AuthUserDeleteResponse, AuthUserGetResponse, AuthUserGrantRoleResponse, AuthUserListResponse, AuthUserRevokeRoleResponse, AuthenticateResponse, RequestWrapper, ResponseWrapper, + Type as PermissionType, }; use crate::{ error::{Result, XlineClientError}, - types::{ - auth::{AuthRoleRevokePermissionRequest, Permission, PermissionType}, - range_end::RangeOption, - }, + types::{auth::Permission, range_end::RangeOption}, AuthService, CurpClient, }; @@ -717,9 +715,7 @@ impl AuthClient { /// # Examples /// /// ```no_run - /// use xline_client::{ - /// types::auth::AuthRoleRevokePermissionRequest, Client, ClientOptions, - /// }; + /// use xline_client::{Client, ClientOptions, types::range_end::RangeOption}; /// use anyhow::Result; /// /// #[tokio::main] @@ -732,8 +728,13 @@ impl AuthClient { /// /// // grant the role /// + /// client.role_revoke_permission("role", "key", None).await?; /// client - /// .role_revoke_permission(AuthRoleRevokePermissionRequest::new("role", "key")) + /// .role_revoke_permission( + /// "role2", + /// "hi", + /// Some(RangeOption::RangeEnd("hjj".into())), + /// ) /// .await?; /// /// Ok(()) @@ -742,9 +743,21 @@ impl AuthClient { #[inline] pub async fn role_revoke_permission( &self, - request: AuthRoleRevokePermissionRequest, + name: impl Into, + key: impl Into>, + range_option: Option, ) -> Result { - self.handle_req(request.inner, false).await + let mut key = key.into(); + let range_end = range_option.unwrap_or_default().get_range_end(&mut key); + self.handle_req( + xlineapi::AuthRoleRevokePermissionRequest { + role: name.into(), + key, + range_end, + }, + false, + ) + .await } /// Send request using fast path diff --git a/crates/xline-client/src/types/auth.rs b/crates/xline-client/src/types/auth.rs index 87291ee57..a025d7323 100644 --- a/crates/xline-client/src/types/auth.rs +++ b/crates/xline-client/src/types/auth.rs @@ -1,4 +1,3 @@ -use xlineapi::command::KeyRange; pub use xlineapi::{ AuthDisableResponse, AuthEnableResponse, AuthRoleAddResponse, AuthRoleDeleteResponse, AuthRoleGetResponse, AuthRoleGrantPermissionResponse, AuthRoleListResponse, @@ -10,70 +9,6 @@ pub use xlineapi::{ use super::range_end::RangeOption; -/// Request for `AuthRoleRevokePermission` -#[derive(Debug, PartialEq)] -pub struct AuthRoleRevokePermissionRequest { - /// Inner request - pub(crate) inner: xlineapi::AuthRoleRevokePermissionRequest, -} - -impl AuthRoleRevokePermissionRequest { - /// Creates a new `RoleRevokePermissionOption` from pb role revoke permission. - /// - /// `role` is the name of the role to revoke permission, - /// `key` is the key to revoke from the role. - #[inline] - pub fn new(role: impl Into, key: impl Into>) -> Self { - Self { - inner: xlineapi::AuthRoleRevokePermissionRequest { - role: role.into(), - key: key.into(), - ..Default::default() - }, - } - } - - /// If set, Xline will return all keys with the matching prefix - #[inline] - #[must_use] - pub fn with_prefix(mut self) -> Self { - if self.inner.key.is_empty() { - self.inner.key = vec![0]; - self.inner.range_end = vec![0]; - } else { - self.inner.range_end = KeyRange::get_prefix(&self.inner.key); - } - self - } - - /// If set, Xline will return all keys that are equal or greater than the given key - #[inline] - #[must_use] - pub fn with_from_key(mut self) -> Self { - if self.inner.key.is_empty() { - self.inner.key = vec![0]; - } - self.inner.range_end = vec![0]; - self - } - - /// `range_end` is the upper bound on the requested range \[key,` range_en`d). - /// If `range_end` is '\0', the range is all keys >= key. - #[inline] - #[must_use] - pub fn with_range_end(mut self, range_end: impl Into>) -> Self { - self.inner.range_end = range_end.into(); - self - } -} - -impl From for xlineapi::AuthRoleRevokePermissionRequest { - #[inline] - fn from(req: AuthRoleRevokePermissionRequest) -> Self { - req.inner - } -} - /// Role access permission. #[derive(Debug, Clone)] pub struct Permission { diff --git a/crates/xline-client/tests/it/auth.rs b/crates/xline-client/tests/it/auth.rs index ecd77e5b0..da32304c2 100644 --- a/crates/xline-client/tests/it/auth.rs +++ b/crates/xline-client/tests/it/auth.rs @@ -2,7 +2,7 @@ use xline_client::{ error::Result, types::{ - auth::{AuthRoleRevokePermissionRequest, Permission, PermissionType}, + auth::{Permission, PermissionType}, range_end::RangeOption, }, }; @@ -79,24 +79,18 @@ async fn permission_operations_should_success_in_normal_path() -> Result<()> { } // revoke all permission + client.role_revoke_permission(role1, "123", None).await?; client - .role_revoke_permission(AuthRoleRevokePermissionRequest::new(role1, "123")) + .role_revoke_permission(role1, "abc", Some(RangeOption::FromKey)) .await?; client - .role_revoke_permission(AuthRoleRevokePermissionRequest::new(role1, "abc").with_from_key()) + .role_revoke_permission(role1, "hi", Some(RangeOption::RangeEnd("hjj".into()))) .await?; client - .role_revoke_permission( - AuthRoleRevokePermissionRequest::new(role1, "hi").with_range_end("hjj"), - ) + .role_revoke_permission(role1, "pp", Some(RangeOption::Prefix)) .await?; client - .role_revoke_permission(AuthRoleRevokePermissionRequest::new(role1, "pp").with_prefix()) - .await?; - client - .role_revoke_permission( - AuthRoleRevokePermissionRequest::new(role1, vec![0]).with_from_key(), - ) + .role_revoke_permission(role1, vec![0], Some(RangeOption::FromKey)) .await?; let role_get_resp = client.role_get(role1).await?; diff --git a/crates/xlinectl/src/command/role/revoke_perm.rs b/crates/xlinectl/src/command/role/revoke_perm.rs index 8ba5c2071..8973c605b 100644 --- a/crates/xlinectl/src/command/role/revoke_perm.rs +++ b/crates/xlinectl/src/command/role/revoke_perm.rs @@ -1,8 +1,11 @@ use clap::{arg, ArgMatches, Command}; -use xline_client::{error::Result, types::auth::AuthRoleRevokePermissionRequest, Client}; +use xline_client::{error::Result, types::range_end::RangeOption, Client}; use crate::utils::printer::Printer; +/// Temp request type for `revoke_perm` command +type AuthRoleRevokePermissionRequest = (String, Vec, Option); + /// Definition of `revoke_perm` command pub(super) fn command() -> Command { Command::new("revoke_perm") @@ -18,19 +21,23 @@ pub(super) fn build_request(matches: &ArgMatches) -> AuthRoleRevokePermissionReq let key = matches.get_one::("key").expect("required"); let range_end = matches.get_one::("range_end"); - let mut request = AuthRoleRevokePermissionRequest::new(name, key.as_bytes()); + let key = key.as_bytes().to_vec(); + let mut option = None; if let Some(range_end) = range_end { - request = request.with_range_end(range_end.as_bytes()); + option = Some(RangeOption::RangeEnd(range_end.as_bytes().to_vec())); }; - request + (name.into(), key, option) } /// Execute the command pub(super) async fn execute(client: &mut Client, matches: &ArgMatches) -> Result<()> { let req = build_request(matches); - let resp = client.auth_client().role_revoke_permission(req).await?; + let resp = client + .auth_client() + .role_revoke_permission(req.0, req.1, req.2) + .await?; resp.print(); Ok(()) @@ -48,11 +55,15 @@ mod tests { let test_cases = vec![ TestCase::new( vec!["revoke_perm", "Admin", "key1", "key2"], - Some(AuthRoleRevokePermissionRequest::new("Admin", "key1").with_range_end("key2")), + Some(( + "Admin".into(), + "key1".into(), + Some(RangeOption::RangeEnd("key2".into())), + )), ), TestCase::new( vec!["revoke_perm", "Admin", "key3"], - Some(AuthRoleRevokePermissionRequest::new("Admin", "key3")), + Some(("Admin".into(), "key3".into(), None)), ), ]; From 7d0bc8f5fe6f4a594a778253559729a2c791d2c0 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 26 Aug 2024 11:22:00 +0800 Subject: [PATCH 053/322] chore: remove unused index from lease_store Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/xline/src/server/xline_server.rs | 3 +- crates/xline/src/storage/lease_store/mod.rs | 36 ++++++++++----------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/crates/xline/src/server/xline_server.rs b/crates/xline/src/server/xline_server.rs index a4b663689..de40466c5 100644 --- a/crates/xline/src/server/xline_server.rs +++ b/crates/xline/src/server/xline_server.rs @@ -228,7 +228,7 @@ impl XlineServer { self.task_manager.spawn(TaskName::CompactBg, |n| { compact_bg_task( Arc::clone(&kv_storage), - Arc::clone(&index), + index, *self.compact_config.compact_batch_size(), *self.compact_config.compact_sleep_interval(), compact_task_rx, @@ -239,7 +239,6 @@ impl XlineServer { Arc::clone(&lease_collection), Arc::clone(&header_gen), Arc::clone(&db), - index, kv_update_tx, *self.cluster_config.is_leader(), )); diff --git a/crates/xline/src/storage/lease_store/mod.rs b/crates/xline/src/storage/lease_store/mod.rs index 7aab4a111..a6ff9c26a 100644 --- a/crates/xline/src/storage/lease_store/mod.rs +++ b/crates/xline/src/storage/lease_store/mod.rs @@ -30,7 +30,7 @@ use xlineapi::{ pub(crate) use self::{lease::Lease, lease_collection::LeaseCollection}; use super::{ db::{WriteOp, DB}, - index::{Index, IndexOperate}, + index::IndexOperate, storage_api::XlineStorageOps, }; use crate::{ @@ -54,9 +54,6 @@ pub(crate) struct LeaseStore { lease_collection: Arc, /// Db to store lease db: Arc, - #[allow(unused)] // used in tests - /// Key to revision index - index: Arc, /// Header generator header_gen: Arc, /// KV update sender @@ -75,14 +72,12 @@ impl LeaseStore { lease_collection: Arc, header_gen: Arc, db: Arc, - index: Arc, kv_update_tx: flume::Sender<(i64, Vec)>, is_leader: bool, ) -> Self { Self { lease_collection, db, - index, header_gen, kv_update_tx, is_primary: AtomicBool::new(is_leader), @@ -394,18 +389,23 @@ mod test { use super::*; use crate::{ revision_number::RevisionNumberGenerator, - storage::{db::DB, storage_api::XlineStorageOps}, + storage::{ + db::DB, + index::{Index, IndexState}, + storage_api::XlineStorageOps, + }, }; #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] async fn test_lease_storage() -> Result<(), Box> { let db = DB::open(&EngineConfig::Memory)?; + let index = Index::new(); let (lease_store, rev_gen) = init_store(db); let rev_gen_state = rev_gen.state(); let req1 = RequestWrapper::from(LeaseGrantRequest { ttl: 10, id: 1 }); - let _ignore1 = exe_and_sync_req(&lease_store, &req1, &rev_gen_state)?; + let _ignore1 = exe_and_sync_req(&lease_store, index.state(), &req1, &rev_gen_state)?; let lo = lease_store.look_up(1).unwrap(); assert_eq!(lo.id(), 1); @@ -419,7 +419,7 @@ mod test { lease_store.lease_collection.detach(1, "key".as_bytes())?; let req2 = RequestWrapper::from(LeaseRevokeRequest { id: 1 }); - let _ignore2 = exe_and_sync_req(&lease_store, &req2, &rev_gen_state)?; + let _ignore2 = exe_and_sync_req(&lease_store, index.state(), &req2, &rev_gen_state)?; assert!(lease_store.look_up(1).is_none()); assert!(lease_store.leases().is_empty()); @@ -427,9 +427,9 @@ mod test { let req4 = RequestWrapper::from(LeaseGrantRequest { ttl: 10, id: 4 }); let req5 = RequestWrapper::from(LeaseRevokeRequest { id: 3 }); let req6 = RequestWrapper::from(LeaseLeasesRequest {}); - let _ignore3 = exe_and_sync_req(&lease_store, &req3, &rev_gen_state)?; - let _ignore4 = exe_and_sync_req(&lease_store, &req4, &rev_gen_state)?; - let resp_1 = exe_and_sync_req(&lease_store, &req6, &rev_gen_state)?; + let _ignore3 = exe_and_sync_req(&lease_store, index.state(), &req3, &rev_gen_state)?; + let _ignore4 = exe_and_sync_req(&lease_store, index.state(), &req4, &rev_gen_state)?; + let resp_1 = exe_and_sync_req(&lease_store, index.state(), &req6, &rev_gen_state)?; let ResponseWrapper::LeaseLeasesResponse(leases_1) = resp_1 else { panic!("wrong response type: {resp_1:?}"); @@ -437,8 +437,8 @@ mod test { assert_eq!(leases_1.leases[0].id, 3); assert_eq!(leases_1.leases[1].id, 4); - let _ignore5 = exe_and_sync_req(&lease_store, &req5, &rev_gen_state)?; - let resp_2 = exe_and_sync_req(&lease_store, &req6, &rev_gen_state)?; + let _ignore5 = exe_and_sync_req(&lease_store, index.state(), &req5, &rev_gen_state)?; + let resp_2 = exe_and_sync_req(&lease_store, index.state(), &req6, &rev_gen_state)?; let ResponseWrapper::LeaseLeasesResponse(leases_2) = resp_2 else { panic!("wrong response type: {resp_2:?}"); }; @@ -505,11 +505,12 @@ mod test { #[abort_on_panic] async fn test_recover() -> Result<(), ExecuteError> { let db = DB::open(&EngineConfig::Memory)?; + let index = Index::new(); let (store, rev_gen) = init_store(Arc::clone(&db)); let rev_gen_state = rev_gen.state(); let req1 = RequestWrapper::from(LeaseGrantRequest { ttl: 10, id: 1 }); - let _ignore1 = exe_and_sync_req(&store, &req1, &rev_gen_state)?; + let _ignore1 = exe_and_sync_req(&store, index.state(), &req1, &rev_gen_state)?; store.lease_collection.attach(1, "key".into())?; let (new_store, _) = init_store(db); @@ -531,21 +532,20 @@ mod test { let lease_collection = Arc::new(LeaseCollection::new(0)); let (kv_update_tx, _) = flume::bounded(1); let header_gen = Arc::new(HeaderGenerator::new(0, 0)); - let index = Arc::new(Index::new()); ( - LeaseStore::new(lease_collection, header_gen, db, index, kv_update_tx, true), + LeaseStore::new(lease_collection, header_gen, db, kv_update_tx, true), RevisionNumberGenerator::new(1), ) } fn exe_and_sync_req( ls: &LeaseStore, + index: IndexState, req: &RequestWrapper, rev_gen: &RevisionNumberGeneratorState<'_>, ) -> Result { let cmd_res = ls.execute(req)?; let txn = ls.db.transaction(); - let index = ls.index.state(); let (_ignore, _ops) = ls.after_sync(req, rev_gen, &txn, &index)?; txn.commit() .map_err(|e| ExecuteError::DbError(e.to_string()))?; From 6f909f5592c7c18ea2b098a05e07c541e7f6c035 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 26 Aug 2024 14:13:44 +0800 Subject: [PATCH 054/322] fix: potential panic in shutdown listener Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/mod.rs | 4 +++- crates/curp/tests/it/common/curp_group.rs | 8 +++++++- crates/utils/src/task_manager/mod.rs | 13 ++++++------- crates/xline/src/server/lease_server.rs | 21 ++++++++++++--------- crates/xline/src/server/watch_server.rs | 8 ++++++-- crates/xline/src/server/xline_server.rs | 3 ++- 6 files changed, 36 insertions(+), 21 deletions(-) diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index 8ed11971f..10e4b23f4 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -331,7 +331,9 @@ impl, RC: RoleChange> Rpc { use crate::rpc::{InnerProtocolServer, ProtocolServer}; - let n = task_manager.get_shutdown_listener(TaskName::TonicServer); + let n = task_manager + .get_shutdown_listener(TaskName::TonicServer) + .unwrap_or_else(|| unreachable!("cluster should never shutdown before start")); let server = Self::new( cluster_info, is_leader, diff --git a/crates/curp/tests/it/common/curp_group.rs b/crates/curp/tests/it/common/curp_group.rs index fbdab5951..8fe32ae18 100644 --- a/crates/curp/tests/it/common/curp_group.rs +++ b/crates/curp/tests/it/common/curp_group.rs @@ -373,6 +373,8 @@ impl CurpGroup { ) .await .expect("wait for group to shutdown timeout"); + // Sleep for some duration because the tasks may not exit immediately + tokio::time::sleep(Duration::from_secs(2)).await; assert!(self.is_finished(), "The group is not finished yet"); } @@ -381,7 +383,11 @@ impl CurpGroup { .flat_map(|node| { BOTTOM_TASKS .iter() - .map(|task| node.task_manager.get_shutdown_listener(task.to_owned())) + .map(|task| { + node.task_manager + .get_shutdown_listener(task.to_owned()) + .unwrap() + }) .collect::>() }) .collect::>(); diff --git a/crates/utils/src/task_manager/mod.rs b/crates/utils/src/task_manager/mod.rs index 8f177b8ee..834949969 100644 --- a/crates/utils/src/task_manager/mod.rs +++ b/crates/utils/src/task_manager/mod.rs @@ -121,18 +121,17 @@ impl TaskManager { } /// Get shutdown listener + /// + /// Returns `None` if the cluster has been shutdowned #[must_use] #[inline] - pub fn get_shutdown_listener(&self, name: TaskName) -> Listener { - let task = self - .tasks - .get(&name) - .unwrap_or_else(|| unreachable!("task {:?} should exist", name)); - Listener::new( + pub fn get_shutdown_listener(&self, name: TaskName) -> Option { + let task = self.tasks.get(&name)?; + Some(Listener::new( Arc::clone(&self.state), Arc::clone(&task.notifier), Arc::clone(&self.cluster_shutdown_tracker), - ) + )) } /// Spawn a task diff --git a/crates/xline/src/server/lease_server.rs b/crates/xline/src/server/lease_server.rs index d528c1c8d..1dca749f7 100644 --- a/crates/xline/src/server/lease_server.rs +++ b/crates/xline/src/server/lease_server.rs @@ -52,6 +52,10 @@ pub(crate) struct LeaseServer { task_manager: Arc, } +/// A lease keep alive stream +type KeepAliveStream = + Pin> + Send>>; + impl LeaseServer { /// New `LeaseServer` pub(crate) fn new( @@ -135,10 +139,11 @@ impl LeaseServer { fn leader_keep_alive( &self, mut request_stream: tonic::Streaming, - ) -> Pin> + Send>> { + ) -> Result { let shutdown_listener = self .task_manager - .get_shutdown_listener(TaskName::LeaseKeepAlive); + .get_shutdown_listener(TaskName::LeaseKeepAlive) + .ok_or(tonic::Status::cancelled("The cluster is shutting down"))?; let lease_storage = Arc::clone(&self.lease_storage); let stream = try_stream! { loop { @@ -176,7 +181,7 @@ impl LeaseServer { }; } }; - Box::pin(stream) + Ok(Box::pin(stream)) } /// Handle keep alive at follower @@ -185,13 +190,11 @@ impl LeaseServer { &self, mut request_stream: tonic::Streaming, leader_addrs: &[String], - ) -> Result< - Pin> + Send>>, - tonic::Status, - > { + ) -> Result { let shutdown_listener = self .task_manager - .get_shutdown_listener(TaskName::LeaseKeepAlive); + .get_shutdown_listener(TaskName::LeaseKeepAlive) + .ok_or(tonic::Status::cancelled("The cluster is shutting down"))?; let endpoints = build_endpoints(leader_addrs, self.client_tls_config.as_ref())?; let channel = tonic::transport::Channel::balance_list(endpoints.into_iter()); let mut lease_client = LeaseClient::new(channel); @@ -302,7 +305,7 @@ impl Lease for LeaseServer { let request_stream = request.into_inner(); let stream = loop { if self.lease_storage.is_primary() { - break self.leader_keep_alive(request_stream); + break self.leader_keep_alive(request_stream)?; } let leader_id = self.client.fetch_leader_id(false).await?; // Given that a candidate server may become a leader when it won the election or diff --git a/crates/xline/src/server/watch_server.rs b/crates/xline/src/server/watch_server.rs index d7cb68f60..29f67cf74 100644 --- a/crates/xline/src/server/watch_server.rs +++ b/crates/xline/src/server/watch_server.rs @@ -481,7 +481,9 @@ mod test { .return_const(-1_i64); let watcher = Arc::new(mock_watcher); let next_id = Arc::new(WatchIdGenerator::new(1)); - let n = task_manager.get_shutdown_listener(TaskName::WatchTask); + let n = task_manager + .get_shutdown_listener(TaskName::WatchTask) + .unwrap(); let handle = tokio::spawn(WatchServer::task( next_id, Arc::clone(&watcher), @@ -733,7 +735,9 @@ mod test { .return_const(-1_i64); let watcher = Arc::new(mock_watcher); let next_id = Arc::new(WatchIdGenerator::new(1)); - let n = task_manager.get_shutdown_listener(TaskName::WatchTask); + let n = task_manager + .get_shutdown_listener(TaskName::WatchTask) + .unwrap(); let handle = tokio::spawn(WatchServer::task( next_id, Arc::clone(&watcher), diff --git a/crates/xline/src/server/xline_server.rs b/crates/xline/src/server/xline_server.rs index d5116d3ac..73a8a4ac6 100644 --- a/crates/xline/src/server/xline_server.rs +++ b/crates/xline/src/server/xline_server.rs @@ -347,7 +347,8 @@ impl XlineServer { ) -> Result>> { let n1 = self .task_manager - .get_shutdown_listener(TaskName::TonicServer); + .get_shutdown_listener(TaskName::TonicServer) + .unwrap_or_else(|| unreachable!("cluster should never shutdown before start")); let n2 = n1.clone(); let db = DB::open(&self.storage_config.engine)?; let key_pair = Self::read_key_pair(&self.auth_config).await?; From 5843ee5c9336f58038dc9b3c6db028e322eb3c1b Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 26 Aug 2024 14:35:32 +0800 Subject: [PATCH 055/322] fix: only return shutdown error on cluster shutdown Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node.rs | 11 ++++++---- crates/curp/src/server/raw_curp/mod.rs | 11 +++++++--- crates/utils/src/task_manager/mod.rs | 28 ++++++++++++++++++-------- 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/crates/curp/src/server/curp_node.rs b/crates/curp/src/server/curp_node.rs index 1b1b94cc9..4e1c5a552 100644 --- a/crates/curp/src/server/curp_node.rs +++ b/crates/curp/src/server/curp_node.rs @@ -160,7 +160,7 @@ impl, RC: RoleChange> CurpNode { resp_tx: Arc, bypassed: bool, ) -> Result<(), CurpError> { - if self.curp.is_shutdown() { + if self.curp.is_cluster_shutdown() { return Err(CurpError::shutting_down()); } self.curp.check_leader_transfer()?; @@ -206,7 +206,7 @@ impl, RC: RoleChange> CurpNode { /// Handle `Record` requests pub(super) fn record(&self, req: &RecordRequest) -> Result { - if self.curp.is_shutdown() { + if self.curp.is_cluster_shutdown() { return Err(CurpError::shutting_down()); } let id = req.propose_id(); @@ -218,7 +218,7 @@ impl, RC: RoleChange> CurpNode { /// Handle `Record` requests pub(super) fn read_index(&self) -> Result { - if self.curp.is_shutdown() { + if self.curp.is_cluster_shutdown() { return Err(CurpError::shutting_down()); } Ok(ReadIndexResponse { @@ -383,9 +383,12 @@ impl, RC: RoleChange> CurpNode { // NOTE: The leader may shutdown itself in configuration change. // We must first check this situation. self.curp.check_leader_transfer()?; - if self.curp.is_shutdown() { + if self.curp.is_cluster_shutdown() { return Err(CurpError::shutting_down()); } + if self.curp.is_node_shutdown() { + return Err(CurpError::node_not_exist()); + } if !self.curp.is_leader() { let (leader_id, term, _) = self.curp.leader(); return Err(CurpError::redirect(leader_id, term)); diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index fd367400f..b6f529c12 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -1374,9 +1374,14 @@ impl RawCurp { ) } - /// Check if the cluster is shutting down - pub(super) fn is_shutdown(&self) -> bool { - self.task_manager.is_shutdown() + /// Check if the current node is shutting down + pub(super) fn is_node_shutdown(&self) -> bool { + self.task_manager.is_node_shutdown() + } + + /// Check if the current node is shutting down + pub(super) fn is_cluster_shutdown(&self) -> bool { + self.task_manager.is_cluster_shutdown() } /// Get a cloned task manager diff --git a/crates/utils/src/task_manager/mod.rs b/crates/utils/src/task_manager/mod.rs index 834949969..587613cb7 100644 --- a/crates/utils/src/task_manager/mod.rs +++ b/crates/utils/src/task_manager/mod.rs @@ -120,6 +120,20 @@ impl TaskManager { self.state.load(Ordering::Acquire) != 0 } + /// Check if the cluster is shutdown + #[must_use] + #[inline] + pub fn is_node_shutdown(&self) -> bool { + self.state.load(Ordering::Acquire) == 1 + } + + /// Check if the cluster is shutdown + #[must_use] + #[inline] + pub fn is_cluster_shutdown(&self) -> bool { + self.state.load(Ordering::Acquire) == 2 + } + /// Get shutdown listener /// /// Returns `None` if the cluster has been shutdowned @@ -167,9 +181,8 @@ impl TaskManager { } /// Inner shutdown task - async fn inner_shutdown(tasks: Arc>, state: Arc) { + async fn inner_shutdown(tasks: Arc>) { let mut queue = Self::root_tasks_queue(&tasks); - state.store(1, Ordering::Release); while let Some(v) = queue.pop_front() { let Some((_name, mut task)) = tasks.remove(&v) else { continue; @@ -205,8 +218,8 @@ impl TaskManager { #[inline] pub async fn shutdown(&self, wait: bool) { let tasks = Arc::clone(&self.tasks); - let state = Arc::clone(&self.state); - let h = tokio::spawn(Self::inner_shutdown(tasks, state)); + self.state.store(1, Ordering::Release); + let h = tokio::spawn(Self::inner_shutdown(tasks)); if wait { h.await .unwrap_or_else(|e| unreachable!("shutdown task should not panic: {e}")); @@ -217,11 +230,10 @@ impl TaskManager { #[inline] pub fn cluster_shutdown(&self) { let tasks = Arc::clone(&self.tasks); - let state = Arc::clone(&self.state); let tracker = Arc::clone(&self.cluster_shutdown_tracker); + self.state.store(2, Ordering::Release); let _ig = tokio::spawn(async move { info!("cluster shutdown start"); - state.store(2, Ordering::Release); _ = tasks .get(&TaskName::SyncFollower) .map(|n| n.notifier.notify_waiters()); @@ -232,7 +244,7 @@ impl TaskManager { tracker.notify.notified().await; } info!("cluster shutdown check passed, start shutdown"); - Self::inner_shutdown(tasks, state).await; + Self::inner_shutdown(tasks).await; }); } @@ -430,7 +442,7 @@ mod test { } drop(record_tx); tokio::time::sleep(Duration::from_secs(1)).await; - TaskManager::inner_shutdown(Arc::clone(&tm.tasks), Arc::clone(&tm.state)).await; + TaskManager::inner_shutdown(Arc::clone(&tm.tasks)).await; let mut shutdown_order = vec![]; while let Some(name) = record_rx.recv().await { shutdown_order.push(name); From dadeab55700902bcdbd941986d06a20c3aee75e5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Aug 2024 05:50:52 +0000 Subject: [PATCH 056/322] chore(deps): bump crate-ci/typos from 1.23.7 to 1.24.1 Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.23.7 to 1.24.1. - [Release notes](https://github.com/crate-ci/typos/releases) - [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md) - [Commits](https://github.com/crate-ci/typos/compare/v1.23.7...v1.24.1) --- updated-dependencies: - dependency-name: crate-ci/typos dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index ebea9fcad..297603834 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -128,7 +128,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Check Spelling - uses: crate-ci/typos@v1.23.7 + uses: crate-ci/typos@v1.24.1 build: name: Build From 416fb732048b8b0f891d56b386933d80bb8267fd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Aug 2024 05:08:37 +0000 Subject: [PATCH 057/322] chore(deps): bump async-trait from 0.1.80 to 0.1.81 Bumps [async-trait](https://github.com/dtolnay/async-trait) from 0.1.80 to 0.1.81. - [Release notes](https://github.com/dtolnay/async-trait/releases) - [Commits](https://github.com/dtolnay/async-trait/compare/0.1.80...0.1.81) --- updated-dependencies: - dependency-name: async-trait dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- crates/curp-external-api/Cargo.toml | 2 +- crates/curp-test-utils/Cargo.toml | 2 +- crates/curp/Cargo.toml | 2 +- crates/engine/Cargo.toml | 2 +- crates/simulation/Cargo.toml | 2 +- crates/utils/Cargo.toml | 2 +- crates/xline-client/Cargo.toml | 2 +- crates/xline/Cargo.toml | 2 +- crates/xlineapi/Cargo.toml | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fe28375a8..44a7d41b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -217,9 +217,9 @@ checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" [[package]] name = "async-trait" -version = "0.1.80" +version = "0.1.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" +checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" dependencies = [ "proc-macro2", "quote", diff --git a/crates/curp-external-api/Cargo.toml b/crates/curp-external-api/Cargo.toml index c17124b52..fe288e0d8 100644 --- a/crates/curp-external-api/Cargo.toml +++ b/crates/curp-external-api/Cargo.toml @@ -11,7 +11,7 @@ categories = ["API"] keywords = ["API", "Curp"] [dependencies] -async-trait = "0.1.80" +async-trait = "0.1.81" engine = { path = "../engine" } mockall = "0.12.1" prost = "0.13" diff --git a/crates/curp-test-utils/Cargo.toml b/crates/curp-test-utils/Cargo.toml index cfae0b69f..622c25696 100644 --- a/crates/curp-test-utils/Cargo.toml +++ b/crates/curp-test-utils/Cargo.toml @@ -11,7 +11,7 @@ license = "Apache-2.0" readme = "README.md" [dependencies] -async-trait = "0.1.80" +async-trait = "0.1.81" bincode = "1.3.3" curp-external-api = { path = "../curp-external-api" } engine = { path = "../engine" } diff --git a/crates/curp/Cargo.toml b/crates/curp/Cargo.toml index 7bde0dcc8..bc2ac71f6 100644 --- a/crates/curp/Cargo.toml +++ b/crates/curp/Cargo.toml @@ -13,7 +13,7 @@ version = "0.1.0" [dependencies] async-stream = "0.3.4" -async-trait = "0.1.80" +async-trait = "0.1.81" bincode = "1.3.3" bytes = "1.4.0" clippy-utilities = "0.2.0" diff --git a/crates/engine/Cargo.toml b/crates/engine/Cargo.toml index f0f80a9e2..9bfaddd37 100644 --- a/crates/engine/Cargo.toml +++ b/crates/engine/Cargo.toml @@ -12,7 +12,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -async-trait = "0.1.80" +async-trait = "0.1.81" bincode = "1.3.3" bytes = "1.4.0" clippy-utilities = "0.2.0" diff --git a/crates/simulation/Cargo.toml b/crates/simulation/Cargo.toml index e1afc93e7..bbc988f05 100644 --- a/crates/simulation/Cargo.toml +++ b/crates/simulation/Cargo.toml @@ -11,7 +11,7 @@ categories = ["Test"] keywords = ["Test", "Deterministic Simulation"] [dependencies] -async-trait = "0.1.80" +async-trait = "0.1.81" bincode = "1.3.3" curp = { path = "../curp" } curp-test-utils = { path = "../curp-test-utils" } diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml index 98837284e..d4f085f7b 100644 --- a/crates/utils/Cargo.toml +++ b/crates/utils/Cargo.toml @@ -18,7 +18,7 @@ tokio = ["dep:async-trait"] parking_lot = ["dep:parking_lot"] [dependencies] -async-trait = { version = "0.1.80", optional = true } +async-trait = { version = "0.1.81", optional = true } clippy-utilities = "0.2.0" dashmap = "6.0.1" derive_builder = "0.20.0" diff --git a/crates/xline-client/Cargo.toml b/crates/xline-client/Cargo.toml index 556c89deb..2554b6aac 100644 --- a/crates/xline-client/Cargo.toml +++ b/crates/xline-client/Cargo.toml @@ -13,7 +13,7 @@ keywords = ["Client", "Xline", "RPC"] [dependencies] anyhow = "1.0.83" async-dropper = { version = "0.3.1", features = ["tokio", "simple"] } -async-trait = "0.1.80" +async-trait = "0.1.81" clippy-utilities = "0.2.0" curp = { path = "../curp" } futures = "0.3.25" diff --git a/crates/xline/Cargo.toml b/crates/xline/Cargo.toml index d79fb39c1..92e5c3655 100644 --- a/crates/xline/Cargo.toml +++ b/crates/xline/Cargo.toml @@ -14,7 +14,7 @@ categories = ["KV"] [dependencies] anyhow = "1.0.83" async-stream = "0.3.5" -async-trait = "0.1.80" +async-trait = "0.1.81" axum = "0.7.0" bytes = "1.4.0" clap = { version = "4", features = ["derive"] } diff --git a/crates/xlineapi/Cargo.toml b/crates/xlineapi/Cargo.toml index 1e984f799..0574402ab 100644 --- a/crates/xlineapi/Cargo.toml +++ b/crates/xlineapi/Cargo.toml @@ -11,7 +11,7 @@ categories = ["RPC"] keywords = ["RPC", "Interfaces"] [dependencies] -async-trait = "0.1.80" +async-trait = "0.1.81" curp = { path = "../curp" } curp-external-api = { path = "../curp-external-api" } itertools = "0.13" From 5defb09c4614a81f3d4a8495633c73208c962ecd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Aug 2024 05:08:27 +0000 Subject: [PATCH 058/322] chore(deps): bump bytes from 1.6.0 to 1.7.1 Bumps [bytes](https://github.com/tokio-rs/bytes) from 1.6.0 to 1.7.1. - [Release notes](https://github.com/tokio-rs/bytes/releases) - [Changelog](https://github.com/tokio-rs/bytes/blob/master/CHANGELOG.md) - [Commits](https://github.com/tokio-rs/bytes/compare/v1.6.0...v1.7.1) --- updated-dependencies: - dependency-name: bytes dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- crates/curp/Cargo.toml | 2 +- crates/engine/Cargo.toml | 2 +- crates/xline/Cargo.toml | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 44a7d41b0..2d3f9988d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -421,9 +421,9 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytes" -version = "1.6.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" [[package]] name = "bzip2-sys" diff --git a/crates/curp/Cargo.toml b/crates/curp/Cargo.toml index bc2ac71f6..163c9ee19 100644 --- a/crates/curp/Cargo.toml +++ b/crates/curp/Cargo.toml @@ -15,7 +15,7 @@ version = "0.1.0" async-stream = "0.3.4" async-trait = "0.1.81" bincode = "1.3.3" -bytes = "1.4.0" +bytes = "1.7.1" clippy-utilities = "0.2.0" curp-external-api = { path = "../curp-external-api" } curp-test-utils = { path = "../curp-test-utils" } diff --git a/crates/engine/Cargo.toml b/crates/engine/Cargo.toml index 9bfaddd37..dee74b692 100644 --- a/crates/engine/Cargo.toml +++ b/crates/engine/Cargo.toml @@ -14,7 +14,7 @@ edition = "2021" [dependencies] async-trait = "0.1.81" bincode = "1.3.3" -bytes = "1.4.0" +bytes = "1.7.1" clippy-utilities = "0.2.0" opentelemetry = { version = "0.24.0", features = ["metrics"] } parking_lot = "0.12.3" diff --git a/crates/xline/Cargo.toml b/crates/xline/Cargo.toml index 92e5c3655..a47d6c8eb 100644 --- a/crates/xline/Cargo.toml +++ b/crates/xline/Cargo.toml @@ -16,7 +16,7 @@ anyhow = "1.0.83" async-stream = "0.3.5" async-trait = "0.1.81" axum = "0.7.0" -bytes = "1.4.0" +bytes = "1.7.1" clap = { version = "4", features = ["derive"] } clippy-utilities = "0.2.0" crc32fast = "1.4.2" From 5b237ff72d098cb5c2f82276a7f250445e5e6cab Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Aug 2024 05:08:08 +0000 Subject: [PATCH 059/322] chore(deps): bump strum from 0.26.2 to 0.26.3 Bumps [strum](https://github.com/Peternator7/strum) from 0.26.2 to 0.26.3. - [Release notes](https://github.com/Peternator7/strum/releases) - [Changelog](https://github.com/Peternator7/strum/blob/master/CHANGELOG.md) - [Commits](https://github.com/Peternator7/strum/compare/v0.26.2...v0.26.3) --- updated-dependencies: - dependency-name: strum dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2d3f9988d..41a83869c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2753,9 +2753,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "strum" -version = "0.26.2" +version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" [[package]] name = "strum_macros" From b6104b64e163f4adf88c49e2f1a9ea81d0dd2175 Mon Sep 17 00:00:00 2001 From: feathercyc Date: Tue, 27 Aug 2024 01:52:10 +0000 Subject: [PATCH 060/322] chore: add version number to eliminate RA warnings Signed-off-by: feathercyc --- crates/utils/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml index d4f085f7b..f5c150b4b 100644 --- a/crates/utils/Cargo.toml +++ b/crates/utils/Cargo.toml @@ -25,7 +25,7 @@ derive_builder = "0.20.0" event-listener = "5.3.1" futures = "0.3.30" getset = "0.1" -interval_map = { package = "rb-interval-map" } +interval_map = { version = "0.1", package = "rb-interval-map" } opentelemetry = { version = "0.24.0", features = ["trace"] } opentelemetry_sdk = { version = "0.24.1", features = ["trace"] } parking_lot = { version = "0.12.3", optional = true } From 6364f1853572e8cb0866079d0bab2233b58363c7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Sep 2024 05:53:34 +0000 Subject: [PATCH 061/322] chore(deps): bump log from 0.4.21 to 0.4.22 Bumps [log](https://github.com/rust-lang/log) from 0.4.21 to 0.4.22. - [Release notes](https://github.com/rust-lang/log/releases) - [Changelog](https://github.com/rust-lang/log/blob/master/CHANGELOG.md) - [Commits](https://github.com/rust-lang/log/compare/0.4.21...0.4.22) --- updated-dependencies: - dependency-name: log dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- crates/xline/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 41a83869c..32d4726e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1550,9 +1550,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.21" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "lz4-sys" diff --git a/crates/xline/Cargo.toml b/crates/xline/Cargo.toml index a47d6c8eb..a5eecaf00 100644 --- a/crates/xline/Cargo.toml +++ b/crates/xline/Cargo.toml @@ -31,7 +31,7 @@ futures = "0.3.25" hyper = "1.0.0" itertools = "0.13" jsonwebtoken = "9.3.0" -log = "0.4.21" +log = "0.4.22" merged_range = "0.1.0" nix = "0.28.0" opentelemetry = { version = "0.24.0", features = ["metrics"] } From bffc0cf8e84d73c33169ed99394da719cefd6459 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Sep 2024 05:53:24 +0000 Subject: [PATCH 062/322] chore(deps): bump quote from 1.0.36 to 1.0.37 Bumps [quote](https://github.com/dtolnay/quote) from 1.0.36 to 1.0.37. - [Release notes](https://github.com/dtolnay/quote/releases) - [Commits](https://github.com/dtolnay/quote/compare/1.0.36...1.0.37) --- updated-dependencies: - dependency-name: quote dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 32d4726e3..7e59c4c6b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2284,9 +2284,9 @@ checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" [[package]] name = "quote" -version = "1.0.36" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] From 576eae884ba40144603b4af67e9baa9fdb280f03 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Sep 2024 05:53:13 +0000 Subject: [PATCH 063/322] chore(deps): bump predicates from 3.1.0 to 3.1.2 Bumps [predicates](https://github.com/assert-rs/predicates-rs) from 3.1.0 to 3.1.2. - [Changelog](https://github.com/assert-rs/predicates-rs/blob/master/CHANGELOG.md) - [Commits](https://github.com/assert-rs/predicates-rs/compare/v3.1.0...v3.1.2) --- updated-dependencies: - dependency-name: predicates dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7e59c4c6b..1b77ed2c0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2129,9 +2129,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "predicates" -version = "3.1.0" +version = "3.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68b87bfd4605926cdfefc1c3b5f8fe560e3feca9d5552cf68c466d3d8236c7e8" +checksum = "7e9086cc7640c29a356d1a29fd134380bee9d8f79a17410aa76e7ad295f42c97" dependencies = [ "anstyle", "difflib", From 0a04784d07c4f1d91142a2b36d139791d671485c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 5 Sep 2024 05:49:11 +0000 Subject: [PATCH 064/322] chore(deps): bump crate-ci/typos from 1.24.1 to 1.24.5 Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.24.1 to 1.24.5. - [Release notes](https://github.com/crate-ci/typos/releases) - [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md) - [Commits](https://github.com/crate-ci/typos/compare/v1.24.1...v1.24.5) --- updated-dependencies: - dependency-name: crate-ci/typos dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 297603834..00ff48bed 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -128,7 +128,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Check Spelling - uses: crate-ci/typos@v1.24.1 + uses: crate-ci/typos@v1.24.5 build: name: Build From c7a6a6ac4e7088e27159ec0375f2e8fe4d3958e0 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 29 Aug 2024 23:18:23 +0800 Subject: [PATCH 065/322] fix: add retry for client initial cluster node discovery Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/mod.rs | 36 ++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 475c5c500..8325eeb46 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -32,7 +32,7 @@ use parking_lot::RwLock; use tokio::task::JoinHandle; #[cfg(not(madsim))] use tonic::transport::ClientTlsConfig; -use tracing::debug; +use tracing::{debug, warn}; #[cfg(madsim)] use utils::ClientTlsConfig; use utils::{build_endpoint, config::ClientConfig}; @@ -298,6 +298,27 @@ impl ClientBuilder { /// Return `tonic::Status` for connection failure or some server errors. #[inline] pub async fn discover_from(mut self, addrs: Vec) -> Result { + /// Sleep duration in secs when the cluster is unavailable + const DISCOVER_SLEEP_DURATION: u64 = 1; + loop { + match self.try_discover_from(&addrs).await { + Ok(()) => return Ok(self), + Err(e) if matches!(e.code(), tonic::Code::Unavailable) => { + warn!("cluster is starting, sleep for {DISCOVER_SLEEP_DURATION} secs"); + tokio::time::sleep(Duration::from_secs(DISCOVER_SLEEP_DURATION)).await; + } + Err(e) => return Err(e), + } + } + } + + /// Discover the initial states from some endpoints + /// + /// # Errors + /// + /// Return `tonic::Status` for connection failure or some server errors. + #[inline] + pub async fn try_discover_from(&mut self, addrs: &[String]) -> Result<(), tonic::Status> { let propose_timeout = *self.config.propose_timeout(); let mut futs: FuturesUnordered<_> = addrs .iter() @@ -330,9 +351,9 @@ impl ClientBuilder { self.all_members = if self.is_raw_curp { Some(r.into_peer_urls()) } else { - Some(r.into_client_urls()) + Some(Self::ensure_no_empty_address(r.into_client_urls())?) }; - return Ok(self); + return Ok(()); } Err(e) => err = e, } @@ -340,6 +361,15 @@ impl ClientBuilder { Err(err) } + /// Ensures that no server has an empty list of addresses. + fn ensure_no_empty_address( + urls: HashMap>, + ) -> Result>, tonic::Status> { + (!urls.values().any(Vec::is_empty)) + .then_some(urls) + .ok_or(tonic::Status::unavailable("cluster not published")) + } + /// Init state builder fn init_state_builder(&self) -> StateBuilder { let mut builder = StateBuilder::new( From 6e207b2c28d08b598f74f3cd2a07f0a56e985308 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Sep 2024 05:32:05 +0000 Subject: [PATCH 066/322] chore(deps): bump cc from 1.0.97 to 1.1.19 Bumps [cc](https://github.com/rust-lang/cc-rs) from 1.0.97 to 1.1.19. - [Release notes](https://github.com/rust-lang/cc-rs/releases) - [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-lang/cc-rs/compare/1.0.97...cc-v1.1.19) --- updated-dependencies: - dependency-name: cc dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1b77ed2c0..afce2c2db 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -438,13 +438,13 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.97" +version = "1.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "099a5357d84c4c61eb35fc8eafa9a79a902c2f76911e5747ced4e032edd8d9b4" +checksum = "2d74707dde2ba56f86ae90effb3b43ddd369504387e718014de010cec7959800" dependencies = [ "jobserver", "libc", - "once_cell", + "shlex", ] [[package]] From 689663dbd662c01bfaee69a33a68949e2d96833f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Sep 2024 05:50:00 +0000 Subject: [PATCH 067/322] chore(deps): bump dashmap from 6.0.1 to 6.1.0 Bumps [dashmap](https://github.com/xacrimon/dashmap) from 6.0.1 to 6.1.0. - [Release notes](https://github.com/xacrimon/dashmap/releases) - [Commits](https://github.com/xacrimon/dashmap/compare/v6.0.1...v6.1.0) --- updated-dependencies: - dependency-name: dashmap dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 4 ++-- crates/curp/Cargo.toml | 2 +- crates/utils/Cargo.toml | 2 +- crates/xline/Cargo.toml | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index afce2c2db..aa2e95cb9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -794,9 +794,9 @@ dependencies = [ [[package]] name = "dashmap" -version = "6.0.1" +version = "6.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "804c8821570c3f8b70230c2ba75ffa5c0f9a4189b9a432b6656c536712acae28" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" dependencies = [ "cfg-if", "crossbeam-utils", diff --git a/crates/curp/Cargo.toml b/crates/curp/Cargo.toml index 163c9ee19..0294aa389 100644 --- a/crates/curp/Cargo.toml +++ b/crates/curp/Cargo.toml @@ -19,7 +19,7 @@ bytes = "1.7.1" clippy-utilities = "0.2.0" curp-external-api = { path = "../curp-external-api" } curp-test-utils = { path = "../curp-test-utils" } -dashmap = "6.0.1" +dashmap = "6.1.0" derive_builder = "0.20.0" engine = { path = "../engine" } event-listener = "5.3.1" diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml index f5c150b4b..809bc8a27 100644 --- a/crates/utils/Cargo.toml +++ b/crates/utils/Cargo.toml @@ -20,7 +20,7 @@ parking_lot = ["dep:parking_lot"] [dependencies] async-trait = { version = "0.1.81", optional = true } clippy-utilities = "0.2.0" -dashmap = "6.0.1" +dashmap = "6.1.0" derive_builder = "0.20.0" event-listener = "5.3.1" futures = "0.3.30" diff --git a/crates/xline/Cargo.toml b/crates/xline/Cargo.toml index a5eecaf00..cf94dbcf4 100644 --- a/crates/xline/Cargo.toml +++ b/crates/xline/Cargo.toml @@ -23,7 +23,7 @@ crc32fast = "1.4.2" crossbeam-skiplist = "0.1.1" curp = { path = "../curp", version = "0.1.0", features = ["client-metrics"] } curp-external-api = { path = "../curp-external-api" } -dashmap = "6.0.1" +dashmap = "6.1.0" engine = { path = "../engine" } event-listener = "5.3.1" flume = "0.11.0" From 8529feaf5adb01497ce9be41dac2f3b727d9e7ef Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 11 Sep 2024 15:53:23 +0800 Subject: [PATCH 068/322] fix: enable `off` log level in `parse_log_level` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/utils/src/parser.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/utils/src/parser.rs b/crates/utils/src/parser.rs index 15c0d7182..75289a5f7 100644 --- a/crates/utils/src/parser.rs +++ b/crates/utils/src/parser.rs @@ -217,6 +217,7 @@ pub fn parse_log_level(s: &str) -> Result { "info" => Ok(LevelConfig::INFO), "warn" => Ok(LevelConfig::WARN), "error" => Ok(LevelConfig::ERROR), + "off" => Ok(LevelConfig::OFF), _ => Err(ConfigParseError::InvalidValue(format!( "the log level should be one of 'trace', 'debug', 'info', 'warn' or 'error' ({s})" ))), From 055ed49f5148570f6793a6fb3fe8bdc3a4e1bb3a Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 11 Sep 2024 16:24:12 +0800 Subject: [PATCH 069/322] refactor: use default log level env if it exists Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/xline/src/utils/trace.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/crates/xline/src/utils/trace.rs b/crates/xline/src/utils/trace.rs index cbfff13d3..9fad02fa9 100644 --- a/crates/xline/src/utils/trace.rs +++ b/crates/xline/src/utils/trace.rs @@ -1,4 +1,4 @@ -use anyhow::{Ok, Result}; +use anyhow::Result; use opentelemetry::global; use opentelemetry::trace::TracerProvider; use opentelemetry_contrib::trace::exporter::jaeger_json::JaegerJsonExporter; @@ -6,7 +6,9 @@ use opentelemetry_sdk::runtime::Tokio; use tracing::warn; use tracing_appender::non_blocking::WorkerGuard; use tracing_subscriber::layer::SubscriberExt; -use tracing_subscriber::{fmt::format, util::SubscriberInitExt, Layer}; +use tracing_subscriber::util::SubscriberInitExt; +use tracing_subscriber::EnvFilter; +use tracing_subscriber::{fmt::format, Layer}; use utils::config::{file_appender, LogConfig, RotationConfig, TraceConfig}; /// Return a Box trait from the config @@ -66,16 +68,19 @@ pub fn init_subscriber( .with_filter(tracing_subscriber::EnvFilter::from_default_env()); let writer = generate_writer(name, log_config); let (non_blocking, guard) = tracing_appender::non_blocking(writer); + let filter = EnvFilter::try_from_default_env() + .unwrap_or_else(|_| EnvFilter::default().add_directive((*log_config.level()).into())); let log_layer = tracing_subscriber::fmt::layer() .event_format(format().compact()) .with_writer(non_blocking) .with_ansi(false) - .with_filter(*log_config.level()); + .with_filter(filter); + tracing_subscriber::registry() .with(jaeger_fmt_layer) .with(jaeger_online_layer) .with(jaeger_offline_layer) .with(log_layer) .try_init()?; - Ok(Some(guard)) + anyhow::Ok(Some(guard)) } From eb22b729b403cd4b49dfe1d48171fa2aa6420bfd Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 11 Sep 2024 16:31:05 +0800 Subject: [PATCH 070/322] chore: update bug_report.yaml Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- .github/ISSUE_TEMPLATE/bug_report.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yaml b/.github/ISSUE_TEMPLATE/bug_report.yaml index 1e96ce684..e040e0de8 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yaml +++ b/.github/ISSUE_TEMPLATE/bug_report.yaml @@ -32,7 +32,8 @@ body: - 0.4.1 - 0.5.0 - 0.6.0 - - 0.6.1 (Default) + - 0.6.1 + - 0.7.0 (Default) validations: required: true - type: textarea From 74601d1d6dd39673f40110e69c49750bed4b98ce Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 23 Sep 2024 10:52:50 +0800 Subject: [PATCH 071/322] chore: remove trailing space in bug_report.yaml Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- .github/ISSUE_TEMPLATE/bug_report.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yaml b/.github/ISSUE_TEMPLATE/bug_report.yaml index e040e0de8..2b4c7532b 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yaml +++ b/.github/ISSUE_TEMPLATE/bug_report.yaml @@ -32,7 +32,7 @@ body: - 0.4.1 - 0.5.0 - 0.6.0 - - 0.6.1 + - 0.6.1 - 0.7.0 (Default) validations: required: true From 81279b4389c02da295d76cae9a9f45cbba135969 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 23 Sep 2024 05:07:29 +0000 Subject: [PATCH 072/322] chore(deps): bump prost from 0.13.1 to 0.13.3 Bumps [prost](https://github.com/tokio-rs/prost) from 0.13.1 to 0.13.3. - [Release notes](https://github.com/tokio-rs/prost/releases) - [Changelog](https://github.com/tokio-rs/prost/blob/master/CHANGELOG.md) - [Commits](https://github.com/tokio-rs/prost/compare/v0.13.1...v0.13.3) --- updated-dependencies: - dependency-name: prost dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 8 ++++---- crates/xline/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aa2e95cb9..4e1f3d3ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2225,9 +2225,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.1" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13db3d3fde688c61e2446b4d843bc27a7e8af269a69440c0308021dc92333cc" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" dependencies = [ "bytes", "prost-derive", @@ -2256,9 +2256,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.1" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18bec9b0adc4eba778b33684b7ba3e7137789434769ee3ce3930463ef904cfca" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" dependencies = [ "anyhow", "itertools 0.13.0", diff --git a/crates/xline/Cargo.toml b/crates/xline/Cargo.toml index cf94dbcf4..83d15d899 100644 --- a/crates/xline/Cargo.toml +++ b/crates/xline/Cargo.toml @@ -51,7 +51,7 @@ parking_lot = "0.12.3" pbkdf2 = { version = "0.12.2", features = ["simple"] } priority-queue = "2.0.2" prometheus = "0.13.4" -prost = "0.13.0" +prost = "0.13.3" real_tokio = { version = "1", package = "tokio" } serde = { version = "1.0.204", features = ["derive"] } sha2 = "0.10.6" From 67ccaf82024a863651a89e1853f285b7ae2dec47 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Sep 2024 05:50:09 +0000 Subject: [PATCH 073/322] chore(deps): bump tempfile from 3.10.1 to 3.12.0 Bumps [tempfile](https://github.com/Stebalien/tempfile) from 3.10.1 to 3.12.0. - [Changelog](https://github.com/Stebalien/tempfile/blob/master/CHANGELOG.md) - [Commits](https://github.com/Stebalien/tempfile/commits) --- updated-dependencies: - dependency-name: tempfile dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Cargo.lock | 76 +++++++++++++++++++++----------------- crates/xlineutl/Cargo.toml | 2 +- 2 files changed, 44 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4e1f3d3ee..3c13d19bb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -477,7 +477,7 @@ dependencies = [ "android-tzdata", "iana-time-zone", "num-traits", - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] @@ -1502,7 +1502,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" dependencies = [ "cfg-if", - "windows-targets 0.52.5", + "windows-targets 0.48.5", ] [[package]] @@ -2019,7 +2019,7 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] @@ -2812,14 +2812,15 @@ checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" [[package]] name = "tempfile" -version = "3.10.1" +version = "3.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64" dependencies = [ "cfg-if", "fastrand", + "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -3488,7 +3489,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.5", + "windows-targets 0.52.6", ] [[package]] @@ -3506,7 +3507,16 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.5", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", ] [[package]] @@ -3526,18 +3536,18 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.5", - "windows_aarch64_msvc 0.52.5", - "windows_i686_gnu 0.52.5", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", "windows_i686_gnullvm", - "windows_i686_msvc 0.52.5", - "windows_x86_64_gnu 0.52.5", - "windows_x86_64_gnullvm 0.52.5", - "windows_x86_64_msvc 0.52.5", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] [[package]] @@ -3548,9 +3558,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" @@ -3560,9 +3570,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" @@ -3572,15 +3582,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" @@ -3590,9 +3600,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" @@ -3602,9 +3612,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" @@ -3614,9 +3624,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" @@ -3626,9 +3636,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" diff --git a/crates/xlineutl/Cargo.toml b/crates/xlineutl/Cargo.toml index 5f11475d1..d1df8e294 100644 --- a/crates/xlineutl/Cargo.toml +++ b/crates/xlineutl/Cargo.toml @@ -18,7 +18,7 @@ crc32fast = "1.4.2" engine = { path = "../engine" } serde = { version = "1.0.204", features = ["derive"] } serde_json = "1.0.125" -tempfile = "3.10.1" +tempfile = "3.12.0" tokio = "1" utils = { path = "../utils" } workspace-hack = { version = "0.1", path = "../../workspace-hack" } From 4c290bca3fdb99d390d077b9252a9a77e9ac57be Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sat, 31 Aug 2024 20:21:09 +0800 Subject: [PATCH 074/322] refactor: curp client propose Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/mod.rs | 2 +- .../src/client/{unary.rs => unary/mod.rs} | 122 +------ crates/curp/src/client/unary/propose_impl.rs | 323 ++++++++++++++++++ crates/curp/src/response.rs | 84 +---- 4 files changed, 337 insertions(+), 194 deletions(-) rename crates/curp/src/client/{unary.rs => unary/mod.rs} (74%) create mode 100644 crates/curp/src/client/unary/propose_impl.rs diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 8325eeb46..822da7802 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -54,7 +54,7 @@ use crate::{ /// The response of propose command, deserialized from [`crate::rpc::ProposeResponse`] or /// [`crate::rpc::WaitSyncedResponse`]. #[allow(type_alias_bounds)] // that's not bad -type ProposeResponse = Result<(C::ER, Option), C::Error>; +pub(crate) type ProposeResponse = Result<(C::ER, Option), C::Error>; /// `ClientApi`, a higher wrapper for `ConnectApi`, providing some methods for communicating to /// the whole curp cluster. Automatically discovery curp server to update it's quorum. diff --git a/crates/curp/src/client/unary.rs b/crates/curp/src/client/unary/mod.rs similarity index 74% rename from crates/curp/src/client/unary.rs rename to crates/curp/src/client/unary/mod.rs index 2acf6658a..8219ec04b 100644 --- a/crates/curp/src/client/unary.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -1,3 +1,6 @@ +/// Client propose implementation +mod propose_impl; + use std::{ cmp::Ordering, marker::PhantomData, @@ -7,9 +10,9 @@ use std::{ use async_trait::async_trait; use curp_external_api::cmd::Command; -use futures::{future, stream::FuturesUnordered, Future, Stream, StreamExt}; +use futures::{Future, StreamExt}; use parking_lot::RwLock; -use tonic::{Response, Status}; +use tonic::Response; use tracing::{debug, warn}; use super::{ @@ -19,14 +22,11 @@ use super::{ use crate::{ members::ServerId, quorum, - response::ResponseReceiver, rpc::{ connect::ConnectApi, ConfChange, CurpError, FetchClusterRequest, FetchClusterResponse, - FetchReadStateRequest, Member, MoveLeaderRequest, OpResponse, ProposeConfChangeRequest, - ProposeId, ProposeRequest, PublishRequest, ReadIndexResponse, ReadState, RecordRequest, - RecordResponse, ShutdownRequest, + FetchReadStateRequest, Member, MoveLeaderRequest, ProposeConfChangeRequest, ProposeId, + PublishRequest, ReadState, ShutdownRequest, }, - super_quorum, tracker::Tracker, }; @@ -118,72 +118,6 @@ impl Unary { } } -impl Unary { - /// Propose for read only commands - /// - /// For read-only commands, we only need to send propose to leader - async fn propose_read_only( - propose_fut: PF, - use_fast_path: bool, - read_index_futs: FuturesUnordered, - term: u64, - quorum: usize, - ) -> Result, CurpError> - where - PF: Future< - Output = Result< - Response> + Send>>, - CurpError, - >, - >, - RIF: Future, CurpError>>, - { - let term_count_fut = read_index_futs - .filter_map(|res| future::ready(res.ok())) - .filter(|resp| future::ready(resp.get_ref().term == term)) - .take(quorum.wrapping_sub(1)) - .count(); - let (propose_res, num_valid) = tokio::join!(propose_fut, term_count_fut); - if num_valid < quorum.wrapping_sub(1) { - return Err(CurpError::WrongClusterVersion(())); - } - let resp_stream = propose_res?.into_inner(); - let mut response_rx = ResponseReceiver::new(resp_stream); - response_rx.recv::(!use_fast_path).await - } - - /// Propose for mutative commands - async fn propose_mutative( - propose_fut: PF, - record_futs: FuturesUnordered, - use_fast_path: bool, - superquorum: usize, - ) -> Result, CurpError> - where - PF: Future< - Output = Result< - Response> + Send>>, - CurpError, - >, - >, - RF: Future, CurpError>>, - { - let record_futs_filtered = record_futs - .filter_map(|res| future::ready(res.ok())) - .filter(|resp| future::ready(!resp.get_ref().conflict)) - .take(superquorum.wrapping_sub(1)) - .collect::>(); - let (propose_res, record_resps) = tokio::join!(propose_fut, record_futs_filtered); - - let resp_stream = propose_res?.into_inner(); - let mut response_rx = ResponseReceiver::new(resp_stream); - let fast_path_failed = record_resps.len() < superquorum.wrapping_sub(1); - response_rx - .recv::(fast_path_failed || !use_fast_path) - .await - } -} - #[async_trait] impl ClientApi for Unary { /// The error is generated from server @@ -390,46 +324,12 @@ impl RepeatableClientApi for Unary { token: Option<&String>, use_fast_path: bool, ) -> Result, Self::Error> { - let cmd_arc = Arc::new(cmd); - let term = self.state.term().await; - let propose_req = ProposeRequest::new::( - propose_id, - cmd_arc.as_ref(), - self.state.cluster_version().await, - term, - !use_fast_path, - self.tracker.read().first_incomplete(), - ); - let record_req = RecordRequest::new::(propose_id, cmd_arc.as_ref()); - let connects_len = self.state.connects_len().await; - let quorum = quorum(connects_len); - let superquorum = super_quorum(connects_len); - let leader_id = self.leader_id().await?; - let timeout = self.config.propose_timeout; - - let propose_fut = self.state.map_server(leader_id, |conn| async move { - conn.propose_stream(propose_req, token.cloned(), timeout) - .await - }); - let record_futs = self - .state - .for_each_follower(leader_id, |conn| { - let record_req_c = record_req.clone(); - async move { conn.record(record_req_c, timeout).await } - }) - .await; - let read_index_futs = self - .state - .for_each_follower( - leader_id, - |conn| async move { conn.read_index(timeout).await }, - ) - .await; - if cmd.is_read_only() { - Self::propose_read_only(propose_fut, use_fast_path, read_index_futs, term, quorum).await + self.propose_read_only(cmd, propose_id, token, use_fast_path) + .await } else { - Self::propose_mutative(propose_fut, record_futs, use_fast_path, superquorum).await + self.propose_mutative(cmd, propose_id, token, use_fast_path) + .await } } diff --git a/crates/curp/src/client/unary/propose_impl.rs b/crates/curp/src/client/unary/propose_impl.rs new file mode 100644 index 000000000..d70033d9d --- /dev/null +++ b/crates/curp/src/client/unary/propose_impl.rs @@ -0,0 +1,323 @@ +use std::pin::Pin; + +use curp_external_api::cmd::Command; +use futures::{future, stream, FutureExt, Stream, StreamExt}; + +use crate::{ + client::ProposeResponse, + members::ServerId, + quorum, + rpc::{CurpError, OpResponse, ProposeId, ProposeRequest, RecordRequest, ResponseOp}, + super_quorum, +}; + +use super::Unary; + +/// A stream of propose events +type EventStream<'a, C> = Box, CurpError>> + Send + 'a>; + +/// An event returned by the cluster during propose +enum ProposeEvent { + /// Speculative execution result + SpecExec { + /// conflict returned by the leader + conflict_l: bool, + /// Speculative execution result + er: Result, + }, + /// After sync result + AfterSync { + /// After sync result + asr: Result, + }, + /// Record result + Record { + /// conflict returned by the follower + conflict: bool, + }, +} + +impl Unary { + /// Propose for mutative commands + pub(super) async fn propose_mutative( + &self, + cmd: &C, + propose_id: ProposeId, + token: Option<&String>, + use_fast_path: bool, + ) -> Result, CurpError> { + let stream = self + .send_propose_mutative(cmd, propose_id, use_fast_path, token) + .await?; + let mut stream = Box::into_pin(stream); + let first_two_events = ( + Self::next_event(&mut stream).await?, + Self::next_event(&mut stream).await?, + ); + match first_two_events { + (ProposeEvent::SpecExec { er, .. }, ProposeEvent::AfterSync { asr }) + | (ProposeEvent::AfterSync { asr }, ProposeEvent::SpecExec { er, .. }) => { + Ok(Self::combine_er_asr(er, asr)) + } + (ProposeEvent::SpecExec { conflict_l, er }, ProposeEvent::Record { conflict }) + | (ProposeEvent::Record { conflict }, ProposeEvent::SpecExec { conflict_l, er }) => { + let require_asr = !use_fast_path || conflict | conflict_l; + Self::with_spec_exec(stream, er, require_asr).await + } + (ProposeEvent::AfterSync { asr }, ProposeEvent::Record { .. }) + | (ProposeEvent::Record { .. }, ProposeEvent::AfterSync { asr }) => { + Self::with_after_sync(stream, asr).await + } + _ => unreachable!("no other possible events"), + } + } + + /// Propose for read only commands + /// + /// For read-only commands, we only need to send propose to leader + /// + /// TODO: Provide an implementation that delegates the read index to the leader for batched + /// processing. + pub(super) async fn propose_read_only( + &self, + cmd: &C, + propose_id: ProposeId, + token: Option<&String>, + use_fast_path: bool, + ) -> Result, CurpError> { + let leader_id = self.leader_id().await?; + let stream = self + .send_leader_propose(cmd, leader_id, propose_id, use_fast_path, token) + .await?; + let mut stream_pinned = Box::into_pin(stream); + if !self.send_read_index(leader_id).await { + return Err(CurpError::WrongClusterVersion(())); + } + if use_fast_path { + let event = Self::next_event(&mut stream_pinned).await?; + match event { + ProposeEvent::SpecExec { conflict_l, er } => { + Self::with_spec_exec(stream_pinned, er, conflict_l).await + } + ProposeEvent::AfterSync { asr } => Self::with_after_sync(stream_pinned, asr).await, + ProposeEvent::Record { .. } => unreachable!("leader does not returns record event"), + } + } else { + let leader_events = ( + Self::next_event(&mut stream_pinned).await?, + Self::next_event(&mut stream_pinned).await?, + ); + match leader_events { + (ProposeEvent::SpecExec { er, .. }, ProposeEvent::AfterSync { asr }) + | (ProposeEvent::AfterSync { asr }, ProposeEvent::SpecExec { er, .. }) => { + Ok(Self::combine_er_asr(er, asr)) + } + _ => unreachable!("no other possible events"), + } + } + } + + /// Send propose to the cluster + /// + /// Returns a stream that combines the propose stream and record request + async fn send_propose_mutative( + &self, + cmd: &C, + propose_id: ProposeId, + use_fast_path: bool, + token: Option<&String>, + ) -> Result, CurpError> { + let leader_id = self.leader_id().await?; + let leader_stream = self + .send_leader_propose(cmd, leader_id, propose_id, use_fast_path, token) + .await?; + let follower_stream = self.send_record(cmd, leader_id, propose_id).await; + let select = stream::select(Box::into_pin(leader_stream), Box::into_pin(follower_stream)); + + Ok(Box::new(select)) + } + + /// Send propose request to the leader + async fn send_leader_propose( + &self, + cmd: &C, + leader_id: ServerId, + propose_id: ProposeId, + use_fast_path: bool, + token: Option<&String>, + ) -> Result, CurpError> { + let term = self.state.term().await; + let propose_req = ProposeRequest::new::( + propose_id, + cmd, + self.state.cluster_version().await, + term, + !use_fast_path, + self.tracker.read().first_incomplete(), + ); + let timeout = self.config.propose_timeout; + let token = token.cloned(); + let stream = self + .state + .map_server(leader_id, move |conn| async move { + conn.propose_stream(propose_req, token, timeout).await + }) + .map(Self::flatten_propose_stream_result) + .map(Box::into_pin) + .flatten_stream(); + + Ok(Box::new(stream)) + } + + /// Send read index requests to the cluster + /// + /// Returns `true` if the read index is successful + async fn send_read_index(&self, leader_id: ServerId) -> bool { + let term = self.state.term().await; + let connects_len = self.state.connects_len().await; + let quorum = quorum(connects_len); + let expect = quorum.wrapping_sub(1); + let timeout = self.config.propose_timeout; + + self.state + .for_each_follower( + leader_id, + |conn| async move { conn.read_index(timeout).await }, + ) + .await + .filter_map(|res| future::ready(res.ok())) + .filter(|resp| future::ready(resp.get_ref().term == term)) + .take(expect) + .count() + .map(|c| c >= expect) + .await + } + + /// Send record requests to the cluster + /// + /// Returns a stream that yield a single event + async fn send_record( + &self, + cmd: &C, + leader_id: ServerId, + propose_id: ProposeId, + ) -> EventStream<'_, C> { + let connects_len = self.state.connects_len().await; + let superquorum = super_quorum(connects_len); + let timeout = self.config.propose_timeout; + let record_req = RecordRequest::new::(propose_id, cmd); + let expect = superquorum.wrapping_sub(1); + let stream = self + .state + .for_each_follower(leader_id, |conn| { + let record_req_c = record_req.clone(); + async move { conn.record(record_req_c, timeout).await } + }) + .await + .filter_map(|res| future::ready(res.ok())) + .filter(|resp| future::ready(!resp.get_ref().conflict)) + .take(expect) + .count() + .map(move |c| ProposeEvent::Record { + conflict: c < expect, + }) + .map(Ok) + .into_stream(); + + Box::new(stream) + } + + /// Flattens the result of `ConnectApi::propose_stream` + /// + /// It is considered a propose failure when the stream returns a `CurpError` + #[allow(clippy::type_complexity)] // copied from the return value of `ConnectApi::propose_stream` + fn flatten_propose_stream_result( + result: Result< + tonic::Response> + Send>>, + CurpError, + >, + ) -> EventStream<'static, C> { + match result { + Ok(stream) => { + let pinned_stream = Box::into_pin(stream.into_inner()); + Box::new( + pinned_stream.map(|r| r.map_err(CurpError::from).map(ProposeEvent::::from)), + ) + } + Err(e) => Box::new(future::ready(Err(e)).into_stream()), + } + } + + /// Combines the results of speculative execution and after-sync replication. + fn combine_er_asr( + er: Result, + asr: Result, + ) -> ProposeResponse { + er.and_then(|e| asr.map(|a| (e, Some(a)))) + } + + /// Handles speculative execution and record processing. + async fn with_spec_exec( + mut stream: Pin>, + er: Result, + require_asr: bool, + ) -> Result, CurpError> { + if require_asr { + let event = Self::next_event(&mut stream).await?; + let ProposeEvent::AfterSync { asr } = event else { + unreachable!("event should only be asr"); + }; + Ok(Self::combine_er_asr(er, asr)) + } else { + Ok(er.map(|e| (e, None))) + } + } + + /// Handles after-sync and record processing. + async fn with_after_sync( + mut stream: Pin>, + asr: Result, + ) -> Result, CurpError> { + let event = Self::next_event(&mut stream).await?; + let ProposeEvent::SpecExec { er, .. } = event else { + unreachable!("event should only be er"); + }; + Ok(Self::combine_er_asr(er, asr)) + } + + /// Retrieves the next event from the stream. + async fn next_event( + stream: &mut Pin>, + ) -> Result, CurpError> { + stream + .next() + .await + .transpose()? + .ok_or(CurpError::internal("propose stream closed")) + } +} + +// Converts the propose stream response to event +// TODO: The deserialization structure need to be simplified +#[allow(clippy::expect_used)] // too verbose to write unreachables +impl From for ProposeEvent { + fn from(resp: OpResponse) -> Self { + match resp.op.expect("op should always exist") { + ResponseOp::Propose(resp) => Self::SpecExec { + conflict_l: resp.conflict, + er: resp + .map_result::(Result::transpose) + .ok() + .flatten() + .expect("er deserialization should never fail"), + }, + ResponseOp::Synced(resp) => Self::AfterSync { + asr: resp + .map_result::(|res| res) + .ok() + .flatten() + .expect("asr deserialization should never fail"), + }, + } + } +} diff --git a/crates/curp/src/response.rs b/crates/curp/src/response.rs index e6c5ca7e6..aeac5fb42 100644 --- a/crates/curp/src/response.rs +++ b/crates/curp/src/response.rs @@ -1,14 +1,8 @@ -use std::{ - pin::Pin, - sync::atomic::{AtomicBool, Ordering}, -}; +use std::sync::atomic::{AtomicBool, Ordering}; -use curp_external_api::cmd::Command; -use futures::Stream; -use tokio_stream::StreamExt; use tonic::Status; -use crate::rpc::{CurpError, OpResponse, ProposeResponse, ResponseOp, SyncedResponse}; +use crate::rpc::{OpResponse, ProposeResponse, ResponseOp, SyncedResponse}; /// The response sender #[derive(Debug)] @@ -58,77 +52,3 @@ impl ResponseSender { let _ignore = self.tx.try_send(Ok(resp)); } } - -/// Receiver for obtaining execution or after sync results -pub(crate) struct ResponseReceiver { - /// The response stream - resp_stream: Pin> + Send>>, -} - -impl ResponseReceiver { - /// Creates a new [`ResponseReceiver`]. - pub(crate) fn new( - resp_stream: Box> + Send>, - ) -> Self { - Self { - resp_stream: Box::into_pin(resp_stream), - } - } - - /// Receives the results - pub(crate) async fn recv( - &mut self, - both: bool, - ) -> Result), C::Error>, CurpError> { - let fst = self.recv_resp().await?; - - match fst { - ResponseOp::Propose(propose_resp) => { - let conflict = propose_resp.conflict; - let er_result = propose_resp.map_result::(|res| { - res.map(|er| er.unwrap_or_else(|| unreachable!())) - })?; - if let Err(e) = er_result { - return Ok(Err(e)); - } - if conflict || both { - let snd = self.recv_resp().await?; - let ResponseOp::Synced(synced_resp) = snd else { - unreachable!() - }; - let asr_result = synced_resp - .map_result::(|res| res.unwrap_or_else(|| unreachable!()))?; - return Ok(er_result.and_then(|er| asr_result.map(|asr| (er, Some(asr))))); - } - Ok(er_result.map(|er| (er, None))) - } - ResponseOp::Synced(synced_resp) => { - let asr_result = synced_resp - .map_result::(|res| res.unwrap_or_else(|| unreachable!()))?; - if let Err(e) = asr_result { - return Ok(Err(e)); - } - let snd = self.recv_resp().await?; - let ResponseOp::Propose(propose_resp) = snd else { - unreachable!("op: {snd:?}") - }; - let er_result = propose_resp.map_result::(|res| { - res.map(|er| er.unwrap_or_else(|| unreachable!())) - })?; - Ok(er_result.and_then(|er| asr_result.map(|asr| (er, Some(asr))))) - } - } - } - - /// Receives a single response from stream - async fn recv_resp(&mut self) -> Result { - let resp = self - .resp_stream - .next() - .await - .ok_or(CurpError::internal("stream reaches on an end".to_owned()))??; - Ok(resp - .op - .unwrap_or_else(|| unreachable!("op should always exist"))) - } -} From 3da9ae3a131189a908cf7b7054629e8d9e19fc4d Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 2 Sep 2024 12:31:22 +0800 Subject: [PATCH 075/322] refactor: send both er and asr to avoid undefined behaviour Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/response.rs | 9 +++++++++ crates/curp/src/server/cmd_worker/mod.rs | 6 ++---- crates/curp/src/server/curp_node.rs | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/crates/curp/src/response.rs b/crates/curp/src/response.rs index aeac5fb42..9305e3c58 100644 --- a/crates/curp/src/response.rs +++ b/crates/curp/src/response.rs @@ -1,5 +1,6 @@ use std::sync::atomic::{AtomicBool, Ordering}; +use curp_external_api::cmd::Command; use tonic::Status; use crate::rpc::{OpResponse, ProposeResponse, ResponseOp, SyncedResponse}; @@ -51,4 +52,12 @@ impl ResponseSender { // Ignore the result because the client might close the receiving stream let _ignore = self.tx.try_send(Ok(resp)); } + + /// Sends the error result + pub(super) fn send_err(&self, err: C::Error) { + let er = ProposeResponse::new_result::(&Err(err.clone()), false); + let asr = SyncedResponse::new_result::(&Err(err)); + self.send_propose(er); + self.send_synced(asr); + } } diff --git a/crates/curp/src/server/cmd_worker/mod.rs b/crates/curp/src/server/cmd_worker/mod.rs index d70cc20e7..7ac5307b0 100644 --- a/crates/curp/src/server/cmd_worker/mod.rs +++ b/crates/curp/src/server/cmd_worker/mod.rs @@ -137,10 +137,8 @@ where cb_w.insert_asr(id, Ok(asr)); } Err(e) => { - let _ignore = tx_opt - .as_ref() - .map(|tx| tx.send_synced(SyncedResponse::new_result::(&Err(e.clone())))); - cb_w.insert_asr(id, Err(e.clone())); + let _ignore = tx_opt.as_ref().map(|tx| tx.send_err::(e.clone())); + cb_w.insert_asr(id, Err(e)); } } } diff --git a/crates/curp/src/server/curp_node.rs b/crates/curp/src/server/curp_node.rs index 4e1c5a552..6e89cff8f 100644 --- a/crates/curp/src/server/curp_node.rs +++ b/crates/curp/src/server/curp_node.rs @@ -323,7 +323,7 @@ impl, RC: RoleChange> CurpNode { Ok((er, None)) => { resp_tx.send_propose(ProposeResponse::new_result::(&Ok(er), false)); } - Err(e) => resp_tx.send_synced(SyncedResponse::new_result::(&Err(e))), + Err(e) => resp_tx.send_err::(e), } } } From 6b2960116d8c356d6147ce62e468b83499ce48eb Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 2 Sep 2024 10:37:52 +0800 Subject: [PATCH 076/322] test: add sleep between er and asr in tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/tests.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index f8c0649f3..e97fce4ce 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -293,6 +293,7 @@ async fn test_unary_propose_fast_path_works() { assert_eq!(id, 0, "followers should not receive propose"); let resp = async_stream::stream! { yield Ok(build_propose_response(false)); + tokio::time::sleep(Duration::from_millis(100)).await; yield Ok(build_synced_response()); }; Ok(tonic::Response::new(Box::new(resp))) @@ -540,6 +541,7 @@ async fn test_read_index_success() { assert_eq!(id, 0, "followers should not receive propose"); let resp = async_stream::stream! { yield Ok(build_propose_response(false)); + tokio::time::sleep(Duration::from_millis(100)).await; yield Ok(build_synced_response()); }; Ok(tonic::Response::new(Box::new(resp))) From aa3b568a11740cfd68e20d2e26b4d38b5d7436bc Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 23 Sep 2024 10:46:40 +0800 Subject: [PATCH 077/322] chore: make the logging message more relevant Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 822da7802..214de7ec1 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -304,7 +304,7 @@ impl ClientBuilder { match self.try_discover_from(&addrs).await { Ok(()) => return Ok(self), Err(e) if matches!(e.code(), tonic::Code::Unavailable) => { - warn!("cluster is starting, sleep for {DISCOVER_SLEEP_DURATION} secs"); + warn!("cluster is unavailable, sleep for {DISCOVER_SLEEP_DURATION} secs"); tokio::time::sleep(Duration::from_secs(DISCOVER_SLEEP_DURATION)).await; } Err(e) => return Err(e), From fd409a5675effc2478df5d994243094b8100bff7 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 28 Aug 2024 20:03:01 +0800 Subject: [PATCH 078/322] feat: add auto reconnect implementation for curp client This PR add the auto reconnect implementation for curp client, as a workaround for https://github.com/hyperium/tonic/issues/1254. Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/mod.rs | 68 ++---- crates/curp/src/client/retry.rs | 8 +- crates/curp/src/client/state.rs | 55 +++-- crates/curp/src/client/tests.rs | 4 +- crates/curp/src/client/unary/mod.rs | 15 +- crates/curp/src/members.rs | 2 - crates/curp/src/rpc/connect.rs | 101 +++++---- crates/curp/src/rpc/mod.rs | 3 + crates/curp/src/rpc/reconnect.rs | 198 ++++++++++++++++++ crates/curp/src/server/curp_node.rs | 21 +- crates/curp/src/server/mod.rs | 9 +- crates/curp/tests/it/common/curp_group.rs | 59 +++--- crates/curp/tests/it/server.rs | 2 - crates/simulation/src/curp_group.rs | 2 - .../tests/it/curp/server_recovery.rs | 13 +- crates/xline-client/src/lib.rs | 3 +- crates/xline/src/server/xline_server.rs | 6 +- crates/xline/tests/it/lock_test.rs | 9 +- 18 files changed, 371 insertions(+), 207 deletions(-) create mode 100644 crates/curp/src/rpc/reconnect.rs diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 214de7ec1..8cc18ca44 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -163,7 +163,7 @@ impl Drop for ProposeIdGuard<'_> { #[async_trait] trait RepeatableClientApi: ClientApi { /// Generate a unique propose id during the retry process. - fn gen_propose_id(&self) -> Result, Self::Error>; + async fn gen_propose_id(&self) -> Result, Self::Error>; /// Send propose to the whole cluster, `use_fast_path` set to `false` to fallback into ordered /// requests (event the requests are commutative). @@ -422,51 +422,23 @@ impl ClientBuilder { }) } - /// Wait for client id - async fn wait_for_client_id(&self, state: Arc) -> Result<(), tonic::Status> { - /// Max retry count for waiting for a client ID - /// - /// TODO: This retry count is set relatively high to avoid test cluster startup timeouts. - /// We should consider setting this to a more reasonable value. - const RETRY_COUNT: usize = 30; - /// The interval for each retry - const RETRY_INTERVAL: Duration = Duration::from_secs(1); - - for _ in 0..RETRY_COUNT { - if state.client_id() != 0 { - return Ok(()); - } - debug!("waiting for client_id"); - tokio::time::sleep(RETRY_INTERVAL).await; - } - - Err(tonic::Status::deadline_exceeded( - "timeout waiting for client id", - )) - } - /// Build the client /// /// # Errors /// /// Return `tonic::transport::Error` for connection failure. #[inline] - pub async fn build( + pub fn build( &self, ) -> Result + Send + Sync + 'static, tonic::Status> { - let state = Arc::new( - self.init_state_builder() - .build() - .await - .map_err(|e| tonic::Status::internal(e.to_string()))?, - ); + let state = Arc::new(self.init_state_builder().build()); let client = Retry::new( Unary::new(Arc::clone(&state), self.init_unary_config()), self.init_retry_config(), Some(self.spawn_bg_tasks(Arc::clone(&state))), ); - self.wait_for_client_id(state).await?; + Ok(client) } @@ -477,21 +449,14 @@ impl ClientBuilder { /// /// Return `tonic::transport::Error` for connection failure. #[inline] - pub async fn build_with_client_id( + #[must_use] + pub fn build_with_client_id( &self, - ) -> Result< - ( - impl ClientApi + Send + Sync + 'static, - Arc, - ), - tonic::Status, - > { - let state = Arc::new( - self.init_state_builder() - .build() - .await - .map_err(|e| tonic::Status::internal(e.to_string()))?, - ); + ) -> ( + impl ClientApi + Send + Sync + 'static, + Arc, + ) { + let state = Arc::new(self.init_state_builder().build()); let client = Retry::new( Unary::new(Arc::clone(&state), self.init_unary_config()), @@ -499,9 +464,8 @@ impl ClientBuilder { Some(self.spawn_bg_tasks(Arc::clone(&state))), ); let client_id = state.clone_client_id(); - self.wait_for_client_id(state).await?; - Ok((client, client_id)) + (client, client_id) } } @@ -512,22 +476,20 @@ impl ClientBuilderWithBypass

{ /// /// Return `tonic::transport::Error` for connection failure. #[inline] - pub async fn build( + pub fn build( self, ) -> Result, tonic::Status> { let state = self .inner .init_state_builder() - .build_bypassed::

(self.local_server_id, self.local_server) - .await - .map_err(|e| tonic::Status::internal(e.to_string()))?; + .build_bypassed::

(self.local_server_id, self.local_server); let state = Arc::new(state); let client = Retry::new( Unary::new(Arc::clone(&state), self.inner.init_unary_config()), self.inner.init_retry_config(), Some(self.inner.spawn_bg_tasks(Arc::clone(&state))), ); - self.inner.wait_for_client_id(state).await?; + Ok(client) } } diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 06e670a89..c67db6019 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -231,7 +231,7 @@ where use_fast_path: bool, ) -> Result, tonic::Status> { self.retry::<_, _>(|client| async move { - let propose_id = self.inner.gen_propose_id()?; + let propose_id = self.inner.gen_propose_id().await?; RepeatableClientApi::propose(client, *propose_id, cmd, token, use_fast_path).await }) .await @@ -245,7 +245,7 @@ where self.retry::<_, _>(|client| { let changes_c = changes.clone(); async move { - let propose_id = self.inner.gen_propose_id()?; + let propose_id = self.inner.gen_propose_id().await?; RepeatableClientApi::propose_conf_change(client, *propose_id, changes_c).await } }) @@ -255,7 +255,7 @@ where /// Send propose to shutdown cluster async fn propose_shutdown(&self) -> Result<(), tonic::Status> { self.retry::<_, _>(|client| async move { - let propose_id = self.inner.gen_propose_id()?; + let propose_id = self.inner.gen_propose_id().await?; RepeatableClientApi::propose_shutdown(client, *propose_id).await }) .await @@ -272,7 +272,7 @@ where let name_c = node_name.clone(); let node_client_urls_c = node_client_urls.clone(); async move { - let propose_id = self.inner.gen_propose_id()?; + let propose_id = self.inner.gen_propose_id().await?; RepeatableClientApi::propose_publish( client, *propose_id, diff --git a/crates/curp/src/client/state.rs b/crates/curp/src/client/state.rs index 8476e46b8..8a9b53081 100644 --- a/crates/curp/src/client/state.rs +++ b/crates/curp/src/client/state.rs @@ -95,7 +95,8 @@ impl State { tls_config, is_raw_curp: true, }, - client_id: Arc::new(AtomicU64::new(0)), + // Sets the client id to non-zero to avoid waiting for client id in tests + client_id: Arc::new(AtomicU64::new(1)), }) } @@ -146,8 +147,8 @@ impl State { }; let resp = rand_conn .fetch_cluster(FetchClusterRequest::default(), REFRESH_TIMEOUT) - .await?; - self.check_and_update(&resp.into_inner()).await?; + .await; + self.check_and_update(&resp?.into_inner()).await?; Ok(()) } @@ -327,7 +328,7 @@ impl State { .remove(&diff) .unwrap_or_else(|| unreachable!("{diff} must in new member addrs")); debug!("client connects to a new server({diff}), address({addrs:?})"); - let new_conn = rpc::connect(diff, addrs, self.immutable.tls_config.clone()).await?; + let new_conn = rpc::connect(diff, addrs, self.immutable.tls_config.clone()); let _ig = e.insert(new_conn); } else { debug!("client removes old server({diff})"); @@ -347,6 +348,30 @@ impl State { Ok(()) } + + /// Wait for client id + pub(super) async fn wait_for_client_id(&self) -> Result { + /// Max retry count for waiting for a client ID + /// + /// TODO: This retry count is set relatively high to avoid test cluster startup timeouts. + /// We should consider setting this to a more reasonable value. + const RETRY_COUNT: usize = 30; + /// The interval for each retry + const RETRY_INTERVAL: Duration = Duration::from_secs(1); + + for _ in 0..RETRY_COUNT { + let client_id = self.client_id(); + if client_id != 0 { + return Ok(client_id); + } + debug!("waiting for client_id"); + tokio::time::sleep(RETRY_INTERVAL).await; + } + + Err(tonic::Status::deadline_exceeded( + "timeout waiting for client id", + )) + } } /// Builder for state @@ -395,24 +420,22 @@ impl StateBuilder { } /// Build the state with local server - pub(super) async fn build_bypassed( + pub(super) fn build_bypassed( mut self, local_server_id: ServerId, local_server: P, - ) -> Result { + ) -> State { debug!("client bypassed server({local_server_id})"); let _ig = self.all_members.remove(&local_server_id); let mut connects: HashMap<_, _> = - rpc::connects(self.all_members.clone(), self.tls_config.as_ref()) - .await? - .collect(); + rpc::connects(self.all_members.clone(), self.tls_config.as_ref()).collect(); let __ig = connects.insert( local_server_id, Arc::new(BypassedConnect::new(local_server_id, local_server)), ); - Ok(State { + State { mutable: RwLock::new(StateMut { leader: self.leader_state.map(|state| state.0), term: self.leader_state.map_or(0, |state| state.1), @@ -426,16 +449,14 @@ impl StateBuilder { is_raw_curp: self.is_raw_curp, }, client_id: Arc::new(AtomicU64::new(0)), - }) + } } /// Build the state - pub(super) async fn build(self) -> Result { + pub(super) fn build(self) -> State { let connects: HashMap<_, _> = - rpc::connects(self.all_members.clone(), self.tls_config.as_ref()) - .await? - .collect(); - Ok(State { + rpc::connects(self.all_members.clone(), self.tls_config.as_ref()).collect(); + State { mutable: RwLock::new(StateMut { leader: self.leader_state.map(|state| state.0), term: self.leader_state.map_or(0, |state| state.1), @@ -449,6 +470,6 @@ impl StateBuilder { is_raw_curp: self.is_raw_curp, }, client_id: Arc::new(AtomicU64::new(0)), - }) + } } } diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index e97fce4ce..39c8b88bc 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -751,7 +751,7 @@ async fn test_stream_client_keep_alive_works() { Box::pin(async move { client_id .compare_exchange( - 0, + 1, 10, std::sync::atomic::Ordering::Relaxed, std::sync::atomic::Ordering::Relaxed, @@ -775,7 +775,7 @@ async fn test_stream_client_keep_alive_on_redirect() { Box::pin(async move { client_id .compare_exchange( - 0, + 1, 10, std::sync::atomic::Ordering::Relaxed, std::sync::atomic::Ordering::Relaxed, diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 8219ec04b..90986bdb7 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -134,7 +134,7 @@ impl ClientApi for Unary { token: Option<&String>, use_fast_path: bool, ) -> Result, CurpError> { - let propose_id = self.gen_propose_id()?; + let propose_id = self.gen_propose_id().await?; RepeatableClientApi::propose(self, *propose_id, cmd, token, use_fast_path).await } @@ -143,13 +143,13 @@ impl ClientApi for Unary { &self, changes: Vec, ) -> Result, CurpError> { - let propose_id = self.gen_propose_id()?; + let propose_id = self.gen_propose_id().await?; RepeatableClientApi::propose_conf_change(self, *propose_id, changes).await } /// Send propose to shutdown cluster async fn propose_shutdown(&self) -> Result<(), CurpError> { - let propose_id = self.gen_propose_id()?; + let propose_id = self.gen_propose_id().await?; RepeatableClientApi::propose_shutdown(self, *propose_id).await } @@ -160,7 +160,7 @@ impl ClientApi for Unary { node_name: String, node_client_urls: Vec, ) -> Result<(), Self::Error> { - let propose_id = self.gen_propose_id()?; + let propose_id = self.gen_propose_id().await?; RepeatableClientApi::propose_publish( self, *propose_id, @@ -306,8 +306,11 @@ impl ClientApi for Unary { #[async_trait] impl RepeatableClientApi for Unary { /// Generate a unique propose id during the retry process. - fn gen_propose_id(&self) -> Result, Self::Error> { - let client_id = self.state.client_id(); + async fn gen_propose_id(&self) -> Result, Self::Error> { + let mut client_id = self.state.client_id(); + if client_id == 0 { + client_id = self.state.wait_for_client_id().await?; + }; let seq_num = self.new_seq_num(); Ok(ProposeIdGuard::new( &self.tracker, diff --git a/crates/curp/src/members.rs b/crates/curp/src/members.rs index ce2045451..5682268f1 100644 --- a/crates/curp/src/members.rs +++ b/crates/curp/src/members.rs @@ -439,8 +439,6 @@ pub async fn get_cluster_info_from_remote( let peers = init_cluster_info.peers_addrs(); let self_client_urls = init_cluster_info.self_client_urls(); let connects = rpc::connects(peers, tls_config) - .await - .ok()? .map(|pair| pair.1) .collect_vec(); let mut futs = connects diff --git a/crates/curp/src/rpc/connect.rs b/crates/curp/src/rpc/connect.rs index d438b6c28..c62b37d31 100644 --- a/crates/curp/src/rpc/connect.rs +++ b/crates/curp/src/rpc/connect.rs @@ -11,7 +11,7 @@ use async_trait::async_trait; use bytes::BytesMut; use clippy_utilities::NumericCast; use engine::SnapshotApi; -use futures::{stream::FuturesUnordered, Stream}; +use futures::Stream; #[cfg(test)] use mockall::automock; use tokio::sync::Mutex; @@ -42,6 +42,7 @@ use crate::{ use super::{ proto::commandpb::{ReadIndexRequest, ReadIndexResponse}, + reconnect::Reconnect, OpResponse, RecordRequest, RecordResponse, }; @@ -69,85 +70,79 @@ impl FromTonicChannel for InnerProtocolClient { } } -/// Connect to a server -async fn connect_to( +/// Creates a new connection +fn connect_to( id: ServerId, addrs: Vec, tls_config: Option, -) -> Result>, tonic::transport::Error> { - let (channel, change_tx) = Channel::balance_channel(DEFAULT_BUFFER_SIZE); +) -> Connect { + let (channel, change_tx) = Channel::balance_channel(DEFAULT_BUFFER_SIZE.max(addrs.len())); for addr in &addrs { - let endpoint = build_endpoint(addr, tls_config.as_ref())?; - let _ig = change_tx - .send(tower::discover::Change::Insert(addr.clone(), endpoint)) - .await; + let endpoint = build_endpoint(addr, tls_config.as_ref()) + .unwrap_or_else(|_| unreachable!("address is ill-formatted")); + change_tx + .try_send(tower::discover::Change::Insert(addr.clone(), endpoint)) + .unwrap_or_else(|_| unreachable!("unknown channel tx send error")); } let client = Client::from_channel(channel); - let connect = Arc::new(Connect { + Connect { id, rpc_connect: client, change_tx, addrs: Mutex::new(addrs), tls_config, - }); - Ok(connect) + } } -/// Connect to a map of members -async fn connect_all( - members: HashMap>, - tls_config: Option<&ClientTlsConfig>, -) -> Result>)>, tonic::transport::Error> { - let conns_to: FuturesUnordered<_> = members - .into_iter() - .map(|(id, addrs)| async move { - connect_to::(id, addrs, tls_config.cloned()) - .await - .map(|conn| (id, conn)) - }) - .collect(); - futures::StreamExt::collect::>(conns_to) - .await - .into_iter() - .collect::, _>>() +/// Creates a new connection with auto reconnect +fn new_reconnect( + id: ServerId, + addrs: Vec, + tls_config: Option, +) -> Reconnect>> { + Reconnect::new(Box::new(move || { + connect_to(id, addrs.clone(), tls_config.clone()) + })) } /// A wrapper of [`connect_to`], hide the detailed [`Connect`] -pub(crate) async fn connect( +pub(crate) fn connect( id: ServerId, addrs: Vec, tls_config: Option, -) -> Result, tonic::transport::Error> { - let conn = connect_to::>(id, addrs, tls_config).await?; - Ok(conn) +) -> Arc { + let conn = new_reconnect(id, addrs, tls_config); + Arc::new(conn) } /// Wrapper of [`connect_all`], hide the details of [`Connect`] -pub(crate) async fn connects( +pub(crate) fn connects( members: HashMap>, tls_config: Option<&ClientTlsConfig>, -) -> Result)>, tonic::transport::Error> { - // It seems that casting high-rank types cannot be inferred, so we allow trivial_casts to cast manually - #[allow(trivial_casts)] - #[allow(clippy::as_conversions)] - let conns = connect_all(members, tls_config) - .await? +) -> impl Iterator)> { + let tls_config = tls_config.cloned(); + members .into_iter() - .map(|(id, conn)| (id, conn as Arc)); - Ok(conns) + .map(move |(id, addrs)| (id, connect(id, addrs, tls_config.clone()))) } /// Wrapper of [`connect_all`], hide the details of [`Connect`] -pub(crate) async fn inner_connects( +pub(crate) fn inner_connects( members: HashMap>, tls_config: Option<&ClientTlsConfig>, -) -> Result, tonic::transport::Error> { - let conns = connect_all(members, tls_config) - .await? - .into_iter() - .map(|(id, conn)| (id, InnerConnectApiWrapper::new_from_arc(conn))); - Ok(conns) +) -> impl Iterator { + let tls_config = tls_config.cloned(); + members.into_iter().map(move |(id, addrs)| { + ( + id, + InnerConnectApiWrapper::new_from_arc(Arc::new(connect_to::< + InnerProtocolClient, + >( + id, addrs, tls_config.clone() + ))), + ) + }) } /// Connect interface between server and clients @@ -282,13 +277,13 @@ impl InnerConnectApiWrapper { } /// Create a new `InnerConnectApiWrapper` from id and addrs - pub(crate) async fn connect( + pub(crate) fn connect( id: ServerId, addrs: Vec, tls_config: Option, - ) -> Result { - let conn = connect_to::>(id, addrs, tls_config).await?; - Ok(InnerConnectApiWrapper::new_from_arc(conn)) + ) -> Self { + let conn = connect_to::>(id, addrs, tls_config); + InnerConnectApiWrapper::new_from_arc(Arc::new(conn)) } } diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index c064c3bb0..10c56fa99 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -67,6 +67,9 @@ mod metrics; pub(crate) mod connect; pub(crate) use connect::{connect, connects, inner_connects}; +/// Auto reconnect connection +mod reconnect; + // Skip for generated code #[allow( clippy::all, diff --git a/crates/curp/src/rpc/reconnect.rs b/crates/curp/src/rpc/reconnect.rs new file mode 100644 index 000000000..e392db38a --- /dev/null +++ b/crates/curp/src/rpc/reconnect.rs @@ -0,0 +1,198 @@ +use std::{ + sync::{atomic::AtomicU64, Arc}, + time::Duration, +}; + +use async_trait::async_trait; +use event_listener::Event; +use futures::Stream; + +use crate::{ + members::ServerId, + rpc::{ + connect::ConnectApi, CurpError, FetchClusterRequest, FetchClusterResponse, + FetchReadStateRequest, FetchReadStateResponse, MoveLeaderRequest, MoveLeaderResponse, + OpResponse, ProposeConfChangeRequest, ProposeConfChangeResponse, ProposeRequest, + PublishRequest, PublishResponse, ReadIndexResponse, RecordRequest, RecordResponse, + ShutdownRequest, ShutdownResponse, + }, +}; + +/// Auto reconnect of a connection +pub(super) struct Reconnect { + /// Connect id + id: ServerId, + /// The connection + connect: tokio::sync::RwLock>, + /// The connect builder + builder: Box C + Send + Sync + 'static>, + /// Signal to abort heartbeat + event: Event, +} + +impl Reconnect { + /// Creates a new `Reconnect` + pub(crate) fn new(builder: Box C + Send + Sync + 'static>) -> Self { + let init_connect = builder(); + Self { + id: init_connect.id(), + connect: tokio::sync::RwLock::new(Some(init_connect)), + builder, + event: Event::new(), + } + } + + /// Creating a new connection to replace the current + async fn reconnect(&self) { + let new_connect = (self.builder)(); + // Cancel the leader keep alive loop task because it hold a read lock + let _cancel = self.event.notify(1); + let _ignore = self.connect.write().await.replace(new_connect); + // After connection is updated, notify to start the keep alive loop + let _continue = self.event.notify(1); + } + + /// Try to reconnect if the result is `Err` + async fn try_reconnect(&self, result: Result) -> Result { + // TODO: use `tonic::Status` instead of `CurpError`, we can't tell + // if a reconnect is required from `CurpError`. + if matches!( + result, + Err(CurpError::RpcTransport(()) | CurpError::Internal(_)) + ) { + tracing::info!("client reconnecting"); + self.reconnect().await; + } + result + } +} + +/// Execute with reconnect +macro_rules! execute_with_reconnect { + ($self:expr, $trait_method:path, $($arg:expr),*) => {{ + let result = { + let connect = $self.connect.read().await; + let connect_ref = connect.as_ref().unwrap(); + ($trait_method)(connect_ref, $($arg),*).await + }; + $self.try_reconnect(result).await + }}; +} + +#[allow(clippy::unwrap_used, clippy::unwrap_in_result)] +#[async_trait] +impl ConnectApi for Reconnect { + /// Get server id + fn id(&self) -> ServerId { + self.id + } + + /// Update server addresses, the new addresses will override the old ones + async fn update_addrs(&self, addrs: Vec) -> Result<(), tonic::transport::Error> { + let connect = self.connect.read().await; + connect.as_ref().unwrap().update_addrs(addrs).await + } + + /// Send `ProposeRequest` + async fn propose_stream( + &self, + request: ProposeRequest, + token: Option, + timeout: Duration, + ) -> Result< + tonic::Response> + Send>>, + CurpError, + > { + execute_with_reconnect!(self, ConnectApi::propose_stream, request, token, timeout) + } + + /// Send `RecordRequest` + async fn record( + &self, + request: RecordRequest, + timeout: Duration, + ) -> Result, CurpError> { + execute_with_reconnect!(self, ConnectApi::record, request, timeout) + } + + /// Send `ReadIndexRequest` + async fn read_index( + &self, + timeout: Duration, + ) -> Result, CurpError> { + execute_with_reconnect!(self, ConnectApi::read_index, timeout) + } + + /// Send `ProposeRequest` + async fn propose_conf_change( + &self, + request: ProposeConfChangeRequest, + timeout: Duration, + ) -> Result, CurpError> { + execute_with_reconnect!(self, ConnectApi::propose_conf_change, request, timeout) + } + + /// Send `PublishRequest` + async fn publish( + &self, + request: PublishRequest, + timeout: Duration, + ) -> Result, CurpError> { + execute_with_reconnect!(self, ConnectApi::publish, request, timeout) + } + + /// Send `ShutdownRequest` + async fn shutdown( + &self, + request: ShutdownRequest, + timeout: Duration, + ) -> Result, CurpError> { + execute_with_reconnect!(self, ConnectApi::shutdown, request, timeout) + } + + /// Send `FetchClusterRequest` + async fn fetch_cluster( + &self, + request: FetchClusterRequest, + timeout: Duration, + ) -> Result, CurpError> { + execute_with_reconnect!(self, ConnectApi::fetch_cluster, request, timeout) + } + + /// Send `FetchReadStateRequest` + async fn fetch_read_state( + &self, + request: FetchReadStateRequest, + timeout: Duration, + ) -> Result, CurpError> { + execute_with_reconnect!(self, ConnectApi::fetch_read_state, request, timeout) + } + + /// Send `MoveLeaderRequest` + async fn move_leader( + &self, + request: MoveLeaderRequest, + timeout: Duration, + ) -> Result, CurpError> { + execute_with_reconnect!(self, ConnectApi::move_leader, request, timeout) + } + + /// Keep send lease keep alive to server and mutate the client id + async fn lease_keep_alive(&self, client_id: Arc, interval: Duration) -> CurpError { + loop { + let connect = self.connect.read().await; + let connect_ref = connect.as_ref().unwrap(); + tokio::select! { + err = connect_ref.lease_keep_alive(Arc::clone(&client_id), interval) => { + return err; + } + _empty = self.event.listen() => {}, + } + // Creates the listener before dropping the read lock. + // This prevents us from losting the event. + let listener = self.event.listen(); + drop(connect); + let _connection_updated = listener.await; + } + } +} diff --git a/crates/curp/src/server/curp_node.rs b/crates/curp/src/server/curp_node.rs index 6e89cff8f..95a4d15f4 100644 --- a/crates/curp/src/server/curp_node.rs +++ b/crates/curp/src/server/curp_node.rs @@ -690,19 +690,11 @@ impl, RC: RoleChange> CurpNode { }; match change.change_type() { ConfChangeType::Add | ConfChangeType::AddLearner => { - let connect = match InnerConnectApiWrapper::connect( + let connect = InnerConnectApiWrapper::connect( change.node_id, change.address, curp.client_tls_config().cloned(), - ) - .await - { - Ok(connect) => connect, - Err(e) => { - error!("connect to {} failed, {}", change.node_id, e); - continue; - } - }; + ); curp.insert_connect(connect.clone()); let sync_event = curp.sync_event(change.node_id); let remove_event = Arc::new(Event::new()); @@ -842,7 +834,8 @@ impl, RC: RoleChange> CurpNode { /// Create a new server instance #[inline] #[allow(clippy::too_many_arguments)] // TODO: refactor this use builder pattern - pub(super) async fn new( + #[allow(clippy::needless_pass_by_value)] // The value should be consumed + pub(super) fn new( cluster_info: Arc, is_leader: bool, cmd_executor: Arc, @@ -860,10 +853,8 @@ impl, RC: RoleChange> CurpNode { .into_iter() .map(|server_id| (server_id, Arc::new(Event::new()))) .collect(); - let connects = rpc::inner_connects(cluster_info.peers_addrs(), client_tls_config.as_ref()) - .await - .map_err(|e| CurpError::internal(format!("parse peers addresses failed, err {e:?}")))? - .collect(); + let connects = + rpc::inner_connects(cluster_info.peers_addrs(), client_tls_config.as_ref()).collect(); let cmd_board = Arc::new(RwLock::new(CommandBoard::new())); let lease_manager = Arc::new(RwLock::new(LeaseManager::new())); let last_applied = cmd_executor diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index 10e4b23f4..4e6806495 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -264,7 +264,7 @@ impl, RC: RoleChange> Rpc { /// Panic if storage creation failed #[inline] #[allow(clippy::too_many_arguments)] // TODO: refactor this use builder pattern - pub async fn new( + pub fn new( cluster_info: Arc, is_leader: bool, executor: Arc, @@ -290,9 +290,7 @@ impl, RC: RoleChange> Rpc { client_tls_config, sps, ucps, - ) - .await - { + ) { Ok(n) => n, Err(err) => { panic!("failed to create curp service, {err:?}"); @@ -346,8 +344,7 @@ impl, RC: RoleChange> Rpc { client_tls_config, sps, ucps, - ) - .await; + ); tonic::transport::Server::builder() .add_service(ProtocolServer::new(server.clone())) diff --git a/crates/curp/tests/it/common/curp_group.rs b/crates/curp/tests/it/common/curp_group.rs index 8fe32ae18..3afe7bd8d 100644 --- a/crates/curp/tests/it/common/curp_group.rs +++ b/crates/curp/tests/it/common/curp_group.rs @@ -136,22 +136,19 @@ impl CurpGroup { let role_change_cb = TestRoleChange::default(); let role_change_arc = role_change_cb.get_inner_arc(); let curp_storage = Arc::new(DB::open(&config.engine_cfg).unwrap()); - let server = Arc::new( - Rpc::new( - cluster_info, - name == leader_name, - ce, - snapshot_allocator, - role_change_cb, - config, - curp_storage, - Arc::clone(&task_manager), - client_tls_config.clone(), - vec![Box::::default()], - vec![Box::::default()], - ) - .await, - ); + let server = Arc::new(Rpc::new( + cluster_info, + name == leader_name, + ce, + snapshot_allocator, + role_change_cb, + config, + curp_storage, + Arc::clone(&task_manager), + client_tls_config.clone(), + vec![Box::::default()], + vec![Box::::default()], + )); task_manager.spawn(TaskName::TonicServer, |n| async move { let ig = Self::run(server, listener, n).await; }); @@ -268,22 +265,19 @@ impl CurpGroup { let role_change_cb = TestRoleChange::default(); let role_change_arc = role_change_cb.get_inner_arc(); let curp_storage = Arc::new(DB::open(&config.engine_cfg).unwrap()); - let server = Arc::new( - Rpc::new( - cluster_info, - false, - ce, - snapshot_allocator, - role_change_cb, - config, - curp_storage, - Arc::clone(&task_manager), - self.client_tls_config.clone(), - vec![], - vec![], - ) - .await, - ); + let server = Arc::new(Rpc::new( + cluster_info, + false, + ce, + snapshot_allocator, + role_change_cb, + config, + curp_storage, + Arc::clone(&task_manager), + self.client_tls_config.clone(), + vec![], + vec![], + )); task_manager.spawn(TaskName::TonicServer, |n| async move { let _ig = Self::run(server, listener, n).await; }); @@ -329,7 +323,6 @@ impl CurpGroup { .await .unwrap() .build() - .await .unwrap() } diff --git a/crates/curp/tests/it/server.rs b/crates/curp/tests/it/server.rs index 9eeb5878a..019440e5f 100644 --- a/crates/curp/tests/it/server.rs +++ b/crates/curp/tests/it/server.rs @@ -437,7 +437,6 @@ async fn shutdown_rpc_should_shutdown_the_cluster_when_client_has_wrong_leader() .leader_state(follower_id, 0) .all_members(group.all_addrs_map()) .build::() - .await .unwrap(); client.propose_shutdown().await.unwrap(); @@ -459,7 +458,6 @@ async fn propose_conf_change_to_follower() { .leader_state(follower_id, 0) .all_members(group.all_addrs_map()) .build::() - .await .unwrap(); let node_id = group.nodes.keys().next().copied().unwrap(); diff --git a/crates/simulation/src/curp_group.rs b/crates/simulation/src/curp_group.rs index e9d3aebe0..aafcf627c 100644 --- a/crates/simulation/src/curp_group.rs +++ b/crates/simulation/src/curp_group.rs @@ -195,10 +195,8 @@ impl CurpGroup { ClientBuilder::new(config, true) .all_members(all_members) .build_with_client_id() - .await }) .await - .unwrap() .unwrap(); SimClient { inner: Arc::new(client), diff --git a/crates/simulation/tests/it/curp/server_recovery.rs b/crates/simulation/tests/it/curp/server_recovery.rs index 7e8a88ccf..3e8c85125 100644 --- a/crates/simulation/tests/it/curp/server_recovery.rs +++ b/crates/simulation/tests/it/curp/server_recovery.rs @@ -457,6 +457,13 @@ async fn recovery_after_compaction() { async fn overwritten_config_should_fallback() { init_logger(); let group = CurpGroup::new(5).await; + let client = group.new_client().await; + // A workaround for dedup. The client will lazily acquire an id from the leader during a + // propose. + let _wait_for_client_id = client + .propose(TestCommand::new_put(vec![0], 0), false) + .await; + let client_id = client.client_id(); let leader1 = group.get_leader().await.0; for node in group.nodes.values().filter(|node| node.id != leader1) { group.disable_node(node.id); @@ -468,13 +475,13 @@ async fn overwritten_config_should_fallback() { let node_id = 123; let address = vec!["127.0.0.1:4567".to_owned()]; let changes = vec![ConfChange::add(node_id, address)]; - let client = group.new_client().await; let res = leader_conn .propose_conf_change( ProposeConfChangeRequest { propose_id: Some(PbProposeId { - client_id: client.client_id(), - seq_num: 0, + client_id, + // start from 1 as we already propose an put with seq_num = 0 + seq_num: 1, }), changes, cluster_version: cluster.cluster_version, diff --git a/crates/xline-client/src/lib.rs b/crates/xline-client/src/lib.rs index 3bc638ba2..b3fd70ed3 100644 --- a/crates/xline-client/src/lib.rs +++ b/crates/xline-client/src/lib.rs @@ -244,8 +244,7 @@ impl Client { .tls_config(options.tls_config) .discover_from(addrs) .await? - .build::() - .await?, + .build::()?, ) as Arc; let id_gen = Arc::new(lease_gen::LeaseIdGenerator::new()); diff --git a/crates/xline/src/server/xline_server.rs b/crates/xline/src/server/xline_server.rs index 73a8a4ac6..655953c79 100644 --- a/crates/xline/src/server/xline_server.rs +++ b/crates/xline/src/server/xline_server.rs @@ -518,8 +518,7 @@ impl XlineServer { self.client_tls_config.clone(), XlineSpeculativePools::new(Arc::clone(&lease_collection)).into_inner(), XlineUncommittedPools::new(lease_collection).into_inner(), - ) - .await; + ); let client = Arc::new( CurpClientBuilder::new(*self.cluster_config.client_config(), false) @@ -527,8 +526,7 @@ impl XlineServer { .cluster_version(self.cluster_info.cluster_version()) .all_members(self.cluster_info.all_members_peer_urls()) .bypass(self.cluster_info.self_id(), curp_server.clone()) - .build::() - .await?, + .build::()?, ) as Arc; if let Some(compactor) = auto_compactor_c { diff --git a/crates/xline/tests/it/lock_test.rs b/crates/xline/tests/it/lock_test.rs index d89231f03..29dc9a19b 100644 --- a/crates/xline/tests/it/lock_test.rs +++ b/crates/xline/tests/it/lock_test.rs @@ -1,4 +1,4 @@ -use std::{error::Error, time::Duration}; +use std::{error::Error, sync::Arc, time::Duration}; use test_macros::abort_on_panic; use tokio::time::{sleep, Instant}; @@ -11,17 +11,20 @@ async fn test_lock() -> Result<(), Box> { cluster.start().await; let client = cluster.client().await; let lock_client = client.lock_client(); + let event = Arc::new(event_listener::Event::new()); let lock_handle = tokio::spawn({ let c = lock_client.clone(); + let event = Arc::clone(&event); async move { let mut xutex = Xutex::new(c, "test", None, None).await.unwrap(); let _lock = xutex.lock_unsafe().await.unwrap(); - sleep(Duration::from_secs(3)).await; + let _notified = event.notify(1); + sleep(Duration::from_secs(2)).await; } }); - sleep(Duration::from_secs(1)).await; + event.listen().await; let now = Instant::now(); let mut xutex = Xutex::new(lock_client, "test", None, None).await?; From d3ba15147d32a00096184428dd41224552e8af36 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 3 Sep 2024 10:49:26 +0800 Subject: [PATCH 079/322] refactor: reimplement curp client state Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/unary/config.rs | 47 ++++++++++++++++++++++++++ crates/curp/src/client/unary/mod.rs | 8 +++++ crates/curp/src/client/unary/state.rs | 46 +++++++++++++++++++++++++ 3 files changed, 101 insertions(+) create mode 100644 crates/curp/src/client/unary/config.rs create mode 100644 crates/curp/src/client/unary/state.rs diff --git a/crates/curp/src/client/unary/config.rs b/crates/curp/src/client/unary/config.rs new file mode 100644 index 000000000..061662dae --- /dev/null +++ b/crates/curp/src/client/unary/config.rs @@ -0,0 +1,47 @@ +use std::time::Duration; + +use tonic::transport::ClientTlsConfig; + +use crate::members::ServerId; + +/// Client config +#[derive(Debug, Clone)] +pub(crate) struct Config { + /// Local server id, should be initialized on startup + local_server: Option, + /// Client tls config + tls_config: Option, + /// The rpc timeout of a propose request + propose_timeout: Duration, + /// The rpc timeout of a 2-RTT request, usually takes longer than propose timeout + /// + /// The recommended the values is within (propose_timeout, 2 * propose_timeout]. + wait_synced_timeout: Duration, +} + +impl Config { + /// Get the local server id + pub(crate) fn local_server(&self) -> Option { + self.local_server + } + + /// Get the client TLS config + pub(crate) fn tls_config(&self) -> Option<&ClientTlsConfig> { + self.tls_config.as_ref() + } + + /// Get the propose timeout + pub(crate) fn propose_timeout(&self) -> Duration { + self.propose_timeout + } + + /// Get the wait synced timeout + pub(crate) fn wait_synced_timeout(&self) -> Duration { + self.wait_synced_timeout + } + + /// Returns `true` if the current client is on the server + pub(crate) fn is_raw_curp(&self) -> bool { + self.local_server.is_some() + } +} diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 90986bdb7..743c7d35d 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -1,6 +1,14 @@ /// Client propose implementation mod propose_impl; +#[allow(unused)] +/// State of the unary client +mod state; + +#[allow(unused)] +/// Config of the client +mod config; + use std::{ cmp::Ordering, marker::PhantomData, diff --git a/crates/curp/src/client/unary/state.rs b/crates/curp/src/client/unary/state.rs new file mode 100644 index 000000000..c85994566 --- /dev/null +++ b/crates/curp/src/client/unary/state.rs @@ -0,0 +1,46 @@ +use std::{collections::HashMap, sync::Arc}; + +use crate::{members::ServerId, rpc::connect::ConnectApi}; + +/// The cluster state +/// +/// The client must discover the cluster info before sending any propose +struct ClusterState { + /// Leader id. + leader: ServerId, + /// Term, initialize to 0, calibrated by the server. + term: u64, + /// Cluster version, initialize to 0, calibrated by the server. + cluster_version: u64, + /// Members' connect, calibrated by the server. + connects: HashMap>, +} + +impl std::fmt::Debug for ClusterState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("State") + .field("leader", &self.leader) + .field("term", &self.term) + .field("cluster_version", &self.cluster_version) + .field("connects", &self.connects.keys()) + .finish() + } +} + +impl ClusterState { + /// Updates the current leader + fn update_leader(&mut self, leader: ServerId, term: u64) { + self.leader = leader; + self.term = term; + } + + /// Updates the cluster + fn update_cluster( + &mut self, + cluster_version: u64, + connects: HashMap>, + ) { + self.cluster_version = cluster_version; + self.connects = connects; + } +} From 0ed2d5b34dab82433043a3c19d388648cf349a95 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 3 Sep 2024 14:38:54 +0800 Subject: [PATCH 080/322] refactor: add new to `Config` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/unary/config.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/crates/curp/src/client/unary/config.rs b/crates/curp/src/client/unary/config.rs index 061662dae..41ef0a9e3 100644 --- a/crates/curp/src/client/unary/config.rs +++ b/crates/curp/src/client/unary/config.rs @@ -20,6 +20,21 @@ pub(crate) struct Config { } impl Config { + /// Creates a new `Config` + pub(crate) fn new( + local_server: Option, + tls_config: Option, + propose_timeout: Duration, + wait_synced_timeout: Duration, + ) -> Self { + Self { + local_server, + tls_config, + propose_timeout, + wait_synced_timeout, + } + } + /// Get the local server id pub(crate) fn local_server(&self) -> Option { self.local_server From dae5e11dea040d3d6198027e4a42a0deee4c61b6 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 3 Sep 2024 14:58:10 +0800 Subject: [PATCH 081/322] feat: implement map functions for `ClusterState` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/unary/state.rs | 53 ++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/crates/curp/src/client/unary/state.rs b/crates/curp/src/client/unary/state.rs index c85994566..65af14f53 100644 --- a/crates/curp/src/client/unary/state.rs +++ b/crates/curp/src/client/unary/state.rs @@ -1,6 +1,11 @@ use std::{collections::HashMap, sync::Arc}; -use crate::{members::ServerId, rpc::connect::ConnectApi}; +use futures::{stream::FuturesUnordered, Future}; + +use crate::{ + members::ServerId, + rpc::{connect::ConnectApi, CurpError}, +}; /// The cluster state /// @@ -28,6 +33,52 @@ impl std::fmt::Debug for ClusterState { } impl ClusterState { + /// Take an async function and map to the dedicated server, return None + /// if the server can not found in local state + pub(crate) fn map_server>>( + &self, + id: ServerId, + f: impl FnOnce(Arc) -> F, + ) -> Option { + // If the leader id cannot be found in connects, it indicates that there is + // an inconsistency between the client's local leader state and the cluster + // state, then mock a `WrongClusterVersion` return to the outside. + self.connects.get(&id).map(Arc::clone).map(f) + } + + /// Take an async function and map to the dedicated server, return None + /// if the server can not found in local state + pub(crate) fn map_leader>>( + &self, + f: impl FnOnce(Arc) -> F, + ) -> Option { + // If the leader id cannot be found in connects, it indicates that there is + // an inconsistency between the client's local leader state and the cluster + // state, then mock a `WrongClusterVersion` return to the outside. + self.connects.get(&self.leader).map(Arc::clone).map(f) + } + + /// Take an async function and map to all server, returning `FuturesUnordered` + pub(crate) fn for_each_server>( + &self, + f: impl FnMut(Arc) -> F, + ) -> FuturesUnordered { + self.connects.values().map(Arc::clone).map(f).collect() + } + + /// Take an async function and map to all server, returning `FuturesUnordered` + pub(crate) fn for_each_follower>( + &self, + f: impl FnMut(Arc) -> F, + ) -> FuturesUnordered { + self.connects + .iter() + .filter_map(|(id, conn)| (*id != self.leader).then_some(conn)) + .map(Arc::clone) + .map(f) + .collect() + } + /// Updates the current leader fn update_leader(&mut self, leader: ServerId, term: u64) { self.leader = leader; From 699004d056b34c7824b1010f38a4e1642749cdf2 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 3 Sep 2024 15:03:33 +0800 Subject: [PATCH 082/322] feat: implement `get_quorum` method for `ClusterState` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/unary/state.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/crates/curp/src/client/unary/state.rs b/crates/curp/src/client/unary/state.rs index 65af14f53..67bbfca3c 100644 --- a/crates/curp/src/client/unary/state.rs +++ b/crates/curp/src/client/unary/state.rs @@ -79,6 +79,15 @@ impl ClusterState { .collect() } + /// Returns the quorum size based on the given quorum function + /// + /// NOTE: Do not update the cluster in between an `for_each_xxx` and an `get_quorum`, which may + /// lead to inconsistent quorum. + pub(crate) fn get_quorum usize>(&self, mut quorum: Q) -> usize { + let cluster_size = self.connects.len(); + quorum(cluster_size) + } + /// Updates the current leader fn update_leader(&mut self, leader: ServerId, term: u64) { self.leader = leader; From 0e676859ed19c80360e06469f9dadaefe04f3932 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 3 Sep 2024 15:18:09 +0800 Subject: [PATCH 083/322] chore: rename state.rs to cluster_state.rs Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/unary/{state.rs => cluster_state.rs} | 0 crates/curp/src/client/unary/mod.rs | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename crates/curp/src/client/unary/{state.rs => cluster_state.rs} (100%) diff --git a/crates/curp/src/client/unary/state.rs b/crates/curp/src/client/unary/cluster_state.rs similarity index 100% rename from crates/curp/src/client/unary/state.rs rename to crates/curp/src/client/unary/cluster_state.rs diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 743c7d35d..f06ec8d8f 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -3,7 +3,7 @@ mod propose_impl; #[allow(unused)] /// State of the unary client -mod state; +mod cluster_state; #[allow(unused)] /// Config of the client From 9eb11d97d8f3123491f7c29ffda3dc3f1c28642f Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 3 Sep 2024 16:29:33 +0800 Subject: [PATCH 084/322] fix: curp client config Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/unary/config.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/curp/src/client/unary/config.rs b/crates/curp/src/client/unary/config.rs index 41ef0a9e3..cc149d966 100644 --- a/crates/curp/src/client/unary/config.rs +++ b/crates/curp/src/client/unary/config.rs @@ -5,7 +5,7 @@ use tonic::transport::ClientTlsConfig; use crate::members::ServerId; /// Client config -#[derive(Debug, Clone)] +#[derive(Default, Debug, Clone)] pub(crate) struct Config { /// Local server id, should be initialized on startup local_server: Option, @@ -17,6 +17,8 @@ pub(crate) struct Config { /// /// The recommended the values is within (propose_timeout, 2 * propose_timeout]. wait_synced_timeout: Duration, + /// is current client send request to raw curp server + is_raw_curp: bool, } impl Config { @@ -26,12 +28,14 @@ impl Config { tls_config: Option, propose_timeout: Duration, wait_synced_timeout: Duration, + is_raw_curp: bool, ) -> Self { Self { local_server, tls_config, propose_timeout, wait_synced_timeout, + is_raw_curp, } } @@ -57,6 +61,6 @@ impl Config { /// Returns `true` if the current client is on the server pub(crate) fn is_raw_curp(&self) -> bool { - self.local_server.is_some() + self.is_raw_curp } } From 648518508d608140f98ed661b031ba6e1fdb63c8 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 3 Sep 2024 16:51:19 +0800 Subject: [PATCH 085/322] feat: reimplement fetch cluster Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/unary/cluster_state.rs | 12 +- crates/curp/src/client/unary/fetch_impl.rs | 104 ++++++++++++++++++ crates/curp/src/client/unary/mod.rs | 17 +++ 3 files changed, 130 insertions(+), 3 deletions(-) create mode 100644 crates/curp/src/client/unary/fetch_impl.rs diff --git a/crates/curp/src/client/unary/cluster_state.rs b/crates/curp/src/client/unary/cluster_state.rs index 67bbfca3c..0b9c3c763 100644 --- a/crates/curp/src/client/unary/cluster_state.rs +++ b/crates/curp/src/client/unary/cluster_state.rs @@ -10,7 +10,8 @@ use crate::{ /// The cluster state /// /// The client must discover the cluster info before sending any propose -struct ClusterState { +#[derive(Default)] +pub(crate) struct ClusterState { /// Leader id. leader: ServerId, /// Term, initialize to 0, calibrated by the server. @@ -88,14 +89,19 @@ impl ClusterState { quorum(cluster_size) } + /// Returns the term of the cluster + pub(crate) fn term(&self) -> u64 { + self.term + } + /// Updates the current leader - fn update_leader(&mut self, leader: ServerId, term: u64) { + pub(crate) fn update_leader(&mut self, leader: ServerId, term: u64) { self.leader = leader; self.term = term; } /// Updates the cluster - fn update_cluster( + pub(crate) fn update_cluster( &mut self, cluster_version: u64, connects: HashMap>, diff --git a/crates/curp/src/client/unary/fetch_impl.rs b/crates/curp/src/client/unary/fetch_impl.rs new file mode 100644 index 000000000..5990c7547 --- /dev/null +++ b/crates/curp/src/client/unary/fetch_impl.rs @@ -0,0 +1,104 @@ +use std::{collections::HashMap, sync::Arc, time::Duration}; + +use curp_external_api::cmd::Command; +use futures::{future, FutureExt, StreamExt}; +use tonic::Response; +use tracing::warn; +use utils::parking_lot_lock::RwLockMap; + +use crate::{ + quorum, + rpc::{self, connect::ConnectApi, CurpError, FetchClusterRequest, FetchClusterResponse}, +}; + +use super::Unary; + +impl Unary { + /// Fetch cluster and updates the current state + pub(super) async fn fetch_cluster1(&self) -> Result<(), CurpError> { + /// Retry interval + const FETCH_RETRY_INTERVAL: Duration = Duration::from_secs(1); + loop { + let resp = self + .pre_fetch() + .await + .ok_or(CurpError::internal("cluster not available"))?; + let new_members = self.member_addrs(&resp); + let new_connects = self.connect_to(new_members); + self.cluster_state + .write() + .update_cluster(resp.cluster_version, new_connects); + if self.fetch_term().await { + return Ok(()); + } + warn!("Fetch cluster failed, sleep for {FETCH_RETRY_INTERVAL:?}"); + tokio::time::sleep(FETCH_RETRY_INTERVAL).await; + } + } + + /// Fetch the term of the cluster. This ensures that the current leader is the latest. + async fn fetch_term(&self) -> bool { + let timeout = self.client_config.wait_synced_timeout(); + self.cluster_state + .map_read(|state| { + let term = state.term(); + let quorum = state.get_quorum(quorum); + self.cluster_state + .read() + .for_each_server(|c| async move { + c.fetch_cluster(FetchClusterRequest { linearizable: true }, timeout) + .await + }) + .filter_map(|r| future::ready(r.ok())) + .map(Response::into_inner) + .filter(move |resp| future::ready(resp.term == term)) + .take(quorum) + .count() + .map(move |t| t >= quorum) + }) + .await + } + + /// Prefetch, send fetch cluster request to the cluster and get the + /// config with the greatest quorum. + async fn pre_fetch(&self) -> Option { + let timeout = self.client_config.wait_synced_timeout(); + let requests = self.cluster_state.read().for_each_server(|c| async move { + c.fetch_cluster(FetchClusterRequest { linearizable: true }, timeout) + .await + }); + let responses: Vec<_> = requests + .filter_map(|r| future::ready(r.ok())) + .map(Response::into_inner) + .collect() + .await; + responses + .into_iter() + .filter(|resp| resp.leader_id.is_some()) + .filter(|resp| !resp.members.is_empty()) + .max_by(|x, y| x.term.cmp(&y.term)) + } + + /// Gets the member addresses to connect to + fn member_addrs(&self, resp: &FetchClusterResponse) -> HashMap> { + if self.client_config.is_raw_curp() { + resp.clone().into_peer_urls() + } else { + resp.clone().into_client_urls() + } + } + + /// Connect to the given addrs + fn connect_to( + &self, + new_members: HashMap>, + ) -> HashMap> { + new_members + .into_iter() + .map(|(id, addrs)| { + let tls_config = self.client_config.tls_config().cloned(); + (id, rpc::connect(id, addrs, tls_config)) + }) + .collect() + } +} diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index f06ec8d8f..6dee35be9 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -1,6 +1,10 @@ /// Client propose implementation mod propose_impl; +#[allow(unused)] +/// Client cluster fetch implementation +mod fetch_impl; + #[allow(unused)] /// State of the unary client mod cluster_state; @@ -23,6 +27,8 @@ use parking_lot::RwLock; use tonic::Response; use tracing::{debug, warn}; +use self::{cluster_state::ClusterState, config::Config}; + use super::{ state::State, ClientApi, LeaderStateUpdate, ProposeIdGuard, ProposeResponse, RepeatableClientApi, @@ -72,6 +78,13 @@ pub(super) struct Unary { last_sent_seq: AtomicU64, /// marker phantom: PhantomData, + + #[allow(dead_code)] + /// Cluster state + cluster_state: RwLock, + #[allow(dead_code)] + /// Cluster state + client_config: Config, } impl Unary { @@ -83,6 +96,10 @@ impl Unary { tracker: RwLock::new(Tracker::default()), last_sent_seq: AtomicU64::new(0), phantom: PhantomData, + + // TODO: build cluster state + cluster_state: RwLock::default(), + client_config: Config::default(), } } From 6e7032670dadcd54a022d9c062c1d0b238e46f0b Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 3 Sep 2024 18:47:26 +0800 Subject: [PATCH 086/322] chore: move cluster_state to upper level Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/{unary => }/cluster_state.rs | 0 crates/curp/src/client/mod.rs | 4 ++++ crates/curp/src/client/unary/mod.rs | 10 +++------- 3 files changed, 7 insertions(+), 7 deletions(-) rename crates/curp/src/client/{unary => }/cluster_state.rs (100%) diff --git a/crates/curp/src/client/unary/cluster_state.rs b/crates/curp/src/client/cluster_state.rs similarity index 100% rename from crates/curp/src/client/unary/cluster_state.rs rename to crates/curp/src/client/cluster_state.rs diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 8cc18ca44..8e0d0f440 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -17,6 +17,10 @@ mod retry; /// State for clients mod state; +#[allow(unused)] +/// State of the cluster +mod cluster_state; + /// Tests for client #[cfg(test)] mod tests; diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 6dee35be9..0b728df25 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -5,10 +5,6 @@ mod propose_impl; /// Client cluster fetch implementation mod fetch_impl; -#[allow(unused)] -/// State of the unary client -mod cluster_state; - #[allow(unused)] /// Config of the client mod config; @@ -27,11 +23,11 @@ use parking_lot::RwLock; use tonic::Response; use tracing::{debug, warn}; -use self::{cluster_state::ClusterState, config::Config}; +use self::config::Config; use super::{ - state::State, ClientApi, LeaderStateUpdate, ProposeIdGuard, ProposeResponse, - RepeatableClientApi, + cluster_state::ClusterState, state::State, ClientApi, LeaderStateUpdate, ProposeIdGuard, + ProposeResponse, RepeatableClientApi, }; use crate::{ members::ServerId, From 4e8de14d664cc74c27a3330f5b988da27a48bc25 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 3 Sep 2024 18:48:29 +0800 Subject: [PATCH 087/322] chore: move fetch_impl to upper level --- crates/curp/src/client/{unary/fetch_impl.rs => fetch.rs} | 0 crates/curp/src/client/mod.rs | 4 ++++ crates/curp/src/client/unary/mod.rs | 4 ---- 3 files changed, 4 insertions(+), 4 deletions(-) rename crates/curp/src/client/{unary/fetch_impl.rs => fetch.rs} (100%) diff --git a/crates/curp/src/client/unary/fetch_impl.rs b/crates/curp/src/client/fetch.rs similarity index 100% rename from crates/curp/src/client/unary/fetch_impl.rs rename to crates/curp/src/client/fetch.rs diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 8e0d0f440..91be0bc5e 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -21,6 +21,10 @@ mod state; /// State of the cluster mod cluster_state; +#[allow(unused)] +/// Client cluster fetch implementation +mod fetch; + /// Tests for client #[cfg(test)] mod tests; diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 0b728df25..821c7113c 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -1,10 +1,6 @@ /// Client propose implementation mod propose_impl; -#[allow(unused)] -/// Client cluster fetch implementation -mod fetch_impl; - #[allow(unused)] /// Config of the client mod config; From ae27b19b29def4fa37c4bf429f3a0ccce27d5f5f Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 3 Sep 2024 18:52:57 +0800 Subject: [PATCH 088/322] chore: move config.rs to upper level Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/{unary => }/config.rs | 0 crates/curp/src/client/mod.rs | 4 ++++ crates/curp/src/client/unary/mod.rs | 10 ++-------- 3 files changed, 6 insertions(+), 8 deletions(-) rename crates/curp/src/client/{unary => }/config.rs (100%) diff --git a/crates/curp/src/client/unary/config.rs b/crates/curp/src/client/config.rs similarity index 100% rename from crates/curp/src/client/unary/config.rs rename to crates/curp/src/client/config.rs diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 91be0bc5e..f759f18b9 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -25,6 +25,10 @@ mod cluster_state; /// Client cluster fetch implementation mod fetch; +#[allow(unused)] +/// Config of the client +mod config; + /// Tests for client #[cfg(test)] mod tests; diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 821c7113c..41f510e80 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -1,10 +1,6 @@ /// Client propose implementation mod propose_impl; -#[allow(unused)] -/// Config of the client -mod config; - use std::{ cmp::Ordering, marker::PhantomData, @@ -19,11 +15,9 @@ use parking_lot::RwLock; use tonic::Response; use tracing::{debug, warn}; -use self::config::Config; - use super::{ - cluster_state::ClusterState, state::State, ClientApi, LeaderStateUpdate, ProposeIdGuard, - ProposeResponse, RepeatableClientApi, + cluster_state::ClusterState, config::Config, state::State, ClientApi, LeaderStateUpdate, + ProposeIdGuard, ProposeResponse, RepeatableClientApi, }; use crate::{ members::ServerId, From 1f2b54034b96dc711e60d5b484affff4410307bb Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 3 Sep 2024 18:51:36 +0800 Subject: [PATCH 089/322] refactor: client fetch Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/cluster_state.rs | 17 +++++- crates/curp/src/client/fetch.rs | 81 +++++++++++++++---------- 2 files changed, 65 insertions(+), 33 deletions(-) diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index 0b9c3c763..12f5a53f8 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -10,7 +10,7 @@ use crate::{ /// The cluster state /// /// The client must discover the cluster info before sending any propose -#[derive(Default)] +#[derive(Default, Clone)] pub(crate) struct ClusterState { /// Leader id. leader: ServerId, @@ -34,6 +34,21 @@ impl std::fmt::Debug for ClusterState { } impl ClusterState { + /// Creates a new `ClusterState` + pub(crate) fn new( + leader: ServerId, + term: u64, + cluster_version: u64, + connects: HashMap>, + ) -> Self { + Self { + leader, + term, + cluster_version, + connects, + } + } + /// Take an async function and map to the dedicated server, return None /// if the server can not found in local state pub(crate) fn map_server>>( diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index 5990c7547..1b7f4e187 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -2,6 +2,7 @@ use std::{collections::HashMap, sync::Arc, time::Duration}; use curp_external_api::cmd::Command; use futures::{future, FutureExt, StreamExt}; +use parking_lot::RwLock; use tonic::Response; use tracing::warn; use utils::parking_lot_lock::RwLockMap; @@ -11,25 +12,45 @@ use crate::{ rpc::{self, connect::ConnectApi, CurpError, FetchClusterRequest, FetchClusterResponse}, }; -use super::Unary; +use super::cluster_state::ClusterState; +use super::config::Config; + +/// Fetch cluster implementation +struct Fetch { + /// The fetch config + config: Config, +} + +impl Fetch { + /// Creates a new `Fetch` + pub(crate) fn new(config: Config) -> Self { + Self { config } + } -impl Unary { /// Fetch cluster and updates the current state - pub(super) async fn fetch_cluster1(&self) -> Result<(), CurpError> { + pub(crate) async fn fetch_cluster( + &self, + state: ClusterState, + ) -> Result { /// Retry interval const FETCH_RETRY_INTERVAL: Duration = Duration::from_secs(1); loop { let resp = self - .pre_fetch() + .pre_fetch(&state) .await .ok_or(CurpError::internal("cluster not available"))?; let new_members = self.member_addrs(&resp); let new_connects = self.connect_to(new_members); - self.cluster_state - .write() - .update_cluster(resp.cluster_version, new_connects); - if self.fetch_term().await { - return Ok(()); + let new_state = ClusterState::new( + resp.leader_id + .unwrap_or_else(|| unreachable!("leader id should be Some")) + .into(), + resp.term, + resp.cluster_version, + new_connects, + ); + if self.fetch_term(&new_state).await { + return Ok(new_state); } warn!("Fetch cluster failed, sleep for {FETCH_RETRY_INTERVAL:?}"); tokio::time::sleep(FETCH_RETRY_INTERVAL).await; @@ -37,33 +58,29 @@ impl Unary { } /// Fetch the term of the cluster. This ensures that the current leader is the latest. - async fn fetch_term(&self) -> bool { - let timeout = self.client_config.wait_synced_timeout(); - self.cluster_state - .map_read(|state| { - let term = state.term(); - let quorum = state.get_quorum(quorum); - self.cluster_state - .read() - .for_each_server(|c| async move { - c.fetch_cluster(FetchClusterRequest { linearizable: true }, timeout) - .await - }) - .filter_map(|r| future::ready(r.ok())) - .map(Response::into_inner) - .filter(move |resp| future::ready(resp.term == term)) - .take(quorum) - .count() - .map(move |t| t >= quorum) + async fn fetch_term(&self, state: &ClusterState) -> bool { + let timeout = self.config.wait_synced_timeout(); + let term = state.term(); + let quorum = state.get_quorum(quorum); + state + .for_each_server(|c| async move { + c.fetch_cluster(FetchClusterRequest { linearizable: true }, timeout) + .await }) + .filter_map(|r| future::ready(r.ok())) + .map(Response::into_inner) + .filter(move |resp| future::ready(resp.term == term)) + .take(quorum) + .count() + .map(move |t| t >= quorum) .await } /// Prefetch, send fetch cluster request to the cluster and get the /// config with the greatest quorum. - async fn pre_fetch(&self) -> Option { - let timeout = self.client_config.wait_synced_timeout(); - let requests = self.cluster_state.read().for_each_server(|c| async move { + async fn pre_fetch(&self, state: &ClusterState) -> Option { + let timeout = self.config.wait_synced_timeout(); + let requests = state.for_each_server(|c| async move { c.fetch_cluster(FetchClusterRequest { linearizable: true }, timeout) .await }); @@ -81,7 +98,7 @@ impl Unary { /// Gets the member addresses to connect to fn member_addrs(&self, resp: &FetchClusterResponse) -> HashMap> { - if self.client_config.is_raw_curp() { + if self.config.is_raw_curp() { resp.clone().into_peer_urls() } else { resp.clone().into_client_urls() @@ -96,7 +113,7 @@ impl Unary { new_members .into_iter() .map(|(id, addrs)| { - let tls_config = self.client_config.tls_config().cloned(); + let tls_config = self.config.tls_config().cloned(); (id, rpc::connect(id, addrs, tls_config)) }) .collect() From 30c997802087a36e3063f7affd4deb500eebb032 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 4 Sep 2024 09:22:45 +0800 Subject: [PATCH 090/322] chore: remove mutable methods from ClusterState Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/cluster_state.rs | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index 12f5a53f8..41aee8178 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -108,20 +108,4 @@ impl ClusterState { pub(crate) fn term(&self) -> u64 { self.term } - - /// Updates the current leader - pub(crate) fn update_leader(&mut self, leader: ServerId, term: u64) { - self.leader = leader; - self.term = term; - } - - /// Updates the cluster - pub(crate) fn update_cluster( - &mut self, - cluster_version: u64, - connects: HashMap>, - ) { - self.cluster_version = cluster_version; - self.connects = connects; - } } From b185824aae0c7a65c9b2b560e2097a5b0ce33d9f Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 4 Sep 2024 11:31:57 +0800 Subject: [PATCH 091/322] refactor: map_leader will always returns a future Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/cluster_state.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index 41aee8178..cdfe72b47 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -67,11 +67,13 @@ impl ClusterState { pub(crate) fn map_leader>>( &self, f: impl FnOnce(Arc) -> F, - ) -> Option { + ) -> F { // If the leader id cannot be found in connects, it indicates that there is // an inconsistency between the client's local leader state and the cluster // state, then mock a `WrongClusterVersion` return to the outside. - self.connects.get(&self.leader).map(Arc::clone).map(f) + f(Arc::clone(self.connects.get(&self.leader).unwrap_or_else( + || unreachable!("leader connect should always exists"), + ))) } /// Take an async function and map to all server, returning `FuturesUnordered` From a41d0c95a7d6d2ce81fc4d8cce924f08f6376fa1 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 4 Sep 2024 11:17:51 +0800 Subject: [PATCH 092/322] refactor: curp client lease keep alive Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/fetch.rs | 3 +- crates/curp/src/client/keep_alive.rs | 104 ++++++ crates/curp/src/client/mod.rs | 32 +- crates/curp/src/client/retry.rs | 202 ++++++----- crates/curp/src/client/tests.rs | 483 ++++++++++++++------------- crates/curp/src/rpc/connect.rs | 36 +- crates/curp/src/rpc/reconnect.rs | 32 +- 7 files changed, 525 insertions(+), 367 deletions(-) create mode 100644 crates/curp/src/client/keep_alive.rs diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index 1b7f4e187..e9af6d884 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -16,7 +16,8 @@ use super::cluster_state::ClusterState; use super::config::Config; /// Fetch cluster implementation -struct Fetch { +#[derive(Debug, Default, Clone)] +pub(crate) struct Fetch { /// The fetch config config: Config, } diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs new file mode 100644 index 000000000..3bcc77a04 --- /dev/null +++ b/crates/curp/src/client/keep_alive.rs @@ -0,0 +1,104 @@ +use std::{ + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, + time::Duration, +}; + +use event_listener::Event; +use futures::Future; +use parking_lot::RwLock; +use tokio::{sync::broadcast, task::JoinHandle}; +use tracing::{debug, info, warn}; + +use super::{cluster_state::ClusterState, state::State}; +use crate::rpc::{connect::ConnectApi, CurpError, Redirect}; + +/// Keep alive +#[derive(Clone, Debug)] +pub(crate) struct KeepAlive { + /// Heartbeat interval + heartbeat_interval: Duration, +} + +/// Handle of the keep alive task +#[derive(Debug)] +pub(crate) struct KeepAliveHandle { + /// Client id + client_id: Arc, + /// Update event of client id + update_event: Arc, + /// Task join handle + handle: JoinHandle<()>, +} + +impl KeepAliveHandle { + /// Wait for the client id + pub(crate) async fn wait_id_update(&self, current_id: u64) -> u64 { + loop { + let id = self.client_id.load(Ordering::Relaxed); + if current_id != id { + return id; + } + self.update_event.listen().await; + } + } +} + +impl KeepAlive { + /// Creates a new `KeepAlive` + pub(crate) fn new(heartbeat_interval: Duration) -> Self { + Self { heartbeat_interval } + } + + /// Streaming keep alive + pub(crate) fn spawn_keep_alive( + self, + cluster_state: Arc>, + ) -> KeepAliveHandle { + /// Sleep duration when keep alive failed + const FAIL_SLEEP_DURATION: Duration = Duration::from_secs(1); + let client_id = Arc::new(AtomicU64::new(0)); + let client_id_c = Arc::clone(&client_id); + let update_event = Arc::new(Event::new()); + let update_event_c = Arc::clone(&update_event); + let handle = tokio::spawn(async move { + loop { + let current_state = cluster_state.read().clone(); + let current_id = client_id.load(Ordering::Relaxed); + match self.keep_alive_with(current_id, current_state).await { + Ok(new_id) => { + client_id.store(new_id, Ordering::Relaxed); + let _ignore = update_event.notify(usize::MAX); + } + Err(e) => { + warn!("keep alive failed: {e:?}"); + // Sleep for some time, the cluster state should be updated in a while + tokio::time::sleep(FAIL_SLEEP_DURATION).await; + } + } + } + }); + + KeepAliveHandle { + client_id: client_id_c, + update_event: update_event_c, + handle, + } + } + + /// Keep alive with the given state and config + pub(crate) async fn keep_alive_with( + &self, + client_id: u64, + cluster_state: ClusterState, + ) -> Result { + cluster_state + .map_leader(|conn| async move { + conn.lease_keep_alive(client_id, self.heartbeat_interval) + .await + }) + .await + } +} diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index f759f18b9..6f054967a 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -8,12 +8,15 @@ mod metrics; /// Unary rpc client mod unary; +#[cfg(ignore)] /// Stream rpc client mod stream; +#[allow(unused)] /// Retry layer mod retry; +#[allow(unused)] /// State for clients mod state; @@ -29,6 +32,10 @@ mod fetch; /// Config of the client mod config; +#[allow(unused)] +/// Lease keep alive implementation +mod keep_alive; + /// Tests for client #[cfg(test)] mod tests; @@ -41,7 +48,6 @@ use async_trait::async_trait; use curp_external_api::cmd::Command; use futures::{stream::FuturesUnordered, StreamExt}; use parking_lot::RwLock; -use tokio::task::JoinHandle; #[cfg(not(madsim))] use tonic::transport::ClientTlsConfig; use tracing::{debug, warn}; @@ -50,6 +56,8 @@ use utils::ClientTlsConfig; use utils::{build_endpoint, config::ClientConfig}; use self::{ + fetch::Fetch, + keep_alive::KeepAlive, retry::{Retry, RetryConfig}, state::StateBuilder, unary::{Unary, UnaryConfig}, @@ -424,16 +432,6 @@ impl ClientBuilder { ) } - /// Spawn background tasks for the client - fn spawn_bg_tasks(&self, state: Arc) -> JoinHandle<()> { - let interval = *self.config.keep_alive_interval(); - tokio::spawn(async move { - let stream = stream::Streaming::new(state, stream::StreamingConfig::new(interval)); - stream.keep_heartbeat().await; - debug!("keep heartbeat task shutdown"); - }) - } - /// Build the client /// /// # Errors @@ -445,10 +443,14 @@ impl ClientBuilder { ) -> Result + Send + Sync + 'static, tonic::Status> { let state = Arc::new(self.init_state_builder().build()); + let keep_alive = KeepAlive::new(*self.config.keep_alive_interval()); + // TODO: build the fetch object + let fetch = Fetch::default(); let client = Retry::new( Unary::new(Arc::clone(&state), self.init_unary_config()), self.init_retry_config(), - Some(self.spawn_bg_tasks(Arc::clone(&state))), + keep_alive, + fetch, ); Ok(client) @@ -496,10 +498,14 @@ impl ClientBuilderWithBypass

{ .init_state_builder() .build_bypassed::

(self.local_server_id, self.local_server); let state = Arc::new(state); + let keep_alive = KeepAlive::new(*self.inner.config.keep_alive_interval()); + // TODO: build the fetch object + let fetch = Fetch::default(); let client = Retry::new( Unary::new(Arc::clone(&state), self.inner.init_unary_config()), self.inner.init_retry_config(), - Some(self.inner.spawn_bg_tasks(Arc::clone(&state))), + keep_alive, + fetch, ); Ok(client) diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index c67db6019..387e4dbdb 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -1,14 +1,19 @@ -use std::{ops::SubAssign, time::Duration}; +use std::{ops::SubAssign, sync::Arc, time::Duration}; use async_trait::async_trait; use futures::Future; -use tokio::task::JoinHandle; -use tracing::{info, warn}; - -use super::{ClientApi, LeaderStateUpdate, ProposeResponse, RepeatableClientApi}; +use parking_lot::RwLock; +use tracing::warn; + +use super::{ + cluster_state::ClusterState, + fetch::Fetch, + keep_alive::{KeepAlive, KeepAliveHandle}, + ClientApi, LeaderStateUpdate, ProposeResponse, RepeatableClientApi, +}; use crate::{ members::ServerId, - rpc::{ConfChange, CurpError, FetchClusterResponse, Member, ReadState, Redirect}, + rpc::{ConfChange, CurpError, FetchClusterResponse, Member, ReadState}, }; /// Backoff config @@ -95,6 +100,35 @@ impl Backoff { } } +/// The context of a retry +#[derive(Debug)] +pub(crate) struct Context { + /// The current client id + client_id: u64, + /// The current cluster state + cluster_state: ClusterState, +} + +impl Context { + /// Creates a new `Context` + pub(crate) fn new(client_id: u64, cluster_state: ClusterState) -> Self { + Self { + client_id, + cluster_state, + } + } + + /// Returns the current client id + pub(crate) fn client_id(&self) -> u64 { + self.client_id + } + + /// Returns the current client id + pub(crate) fn cluster_state(&self) -> ClusterState { + self.cluster_state.clone() + } +} + /// The retry client automatically retry the requests of the inner client api /// which raises the [`tonic::Status`] error #[derive(Debug)] @@ -102,18 +136,13 @@ pub(super) struct Retry { /// Inner client inner: Api, /// Retry config - config: RetryConfig, - /// Background task handle - bg_handle: Option>, -} - -impl Drop for Retry { - fn drop(&mut self) { - if let Some(handle) = self.bg_handle.as_ref() { - info!("stopping background task"); - handle.abort(); - } - } + retry_config: RetryConfig, + /// Cluster state + cluster_state: Arc>, + /// Keep alive client + keep_alive: KeepAliveHandle, + /// Fetch cluster object + fetch: Fetch, } impl Retry @@ -121,77 +150,49 @@ where Api: RepeatableClientApi + LeaderStateUpdate + Send + Sync + 'static, { /// Create a retry client - pub(super) fn new(inner: Api, config: RetryConfig, bg_handle: Option>) -> Self { + pub(super) fn new( + inner: Api, + retry_config: RetryConfig, + keep_alive: KeepAlive, + fetch: Fetch, + ) -> Self { + // TODO: build state from parameters + let cluster_state = Arc::new(RwLock::default()); + let keep_alive_handle = keep_alive.spawn_keep_alive(Arc::clone(&cluster_state)); Self { inner, - config, - bg_handle, + retry_config, + cluster_state, + keep_alive: keep_alive_handle, + fetch, } } /// Takes a function f and run retry. - async fn retry<'a, R, F>(&'a self, f: impl Fn(&'a Api) -> F) -> Result + async fn retry<'a, R, F>( + &'a self, + f: impl Fn(&'a Api, Context) -> F, + ) -> Result where F: Future>, { - let mut backoff = self.config.init_backoff(); + let mut backoff = self.retry_config.init_backoff(); let mut last_err = None; + let client_id = self.keep_alive.wait_id_update(0).await; while let Some(delay) = backoff.next_delay() { - let err = match f(&self.inner).await { + let cluster_state = self.cluster_state.read().clone(); + let context = Context::new(client_id, cluster_state.clone()); + let result = tokio::select! { + result = f(&self.inner, context) => result, + _ = self.keep_alive.wait_id_update(client_id) => { + return Err(CurpError::expired_client_id().into()); + }, + }; + let err = match result { Ok(res) => return Ok(res), Err(err) => err, }; - - match err { - // some errors that should not retry - CurpError::Duplicated(()) - | CurpError::ShuttingDown(()) - | CurpError::InvalidConfig(()) - | CurpError::NodeNotExists(()) - | CurpError::NodeAlreadyExists(()) - | CurpError::LearnerNotCatchUp(()) => { - return Err(tonic::Status::from(err)); - } - - // some errors that could have a retry - CurpError::ExpiredClientId(()) - | CurpError::KeyConflict(()) - | CurpError::Internal(_) - | CurpError::LeaderTransfer(_) => {} - - // update leader state if we got a rpc transport error - CurpError::RpcTransport(()) => { - if let Err(e) = self.inner.fetch_leader_id(true).await { - warn!("fetch leader failed, error {e:?}"); - } - } - - // update the cluster state if got WrongClusterVersion - CurpError::WrongClusterVersion(()) => { - // the inner client should automatically update cluster state when fetch_cluster - if let Err(e) = self.inner.fetch_cluster(true).await { - warn!("fetch cluster failed, error {e:?}"); - } - } - - // update the leader state if got Redirect - CurpError::Redirect(Redirect { - ref leader_id, - term, - }) => { - let _ig = self - .inner - .update_leader(leader_id.as_ref().map(Into::into), term) - .await; - } - - // update the cluster state if got Zombie - CurpError::Zombie(()) => { - if let Err(e) = self.inner.fetch_cluster(true).await { - warn!("fetch cluster failed, error {e:?}"); - } - } - } + self.handle_err(&err, cluster_state).await?; #[cfg(feature = "client-metrics")] super::metrics::get().client_retry_count.add(1, &[]); @@ -209,6 +210,43 @@ where last_err.unwrap_or_else(|| unreachable!("last error must be set")) ))) } + + /// Handles errors before another retry + async fn handle_err( + &self, + err: &CurpError, + cluster_state: ClusterState, + ) -> Result<(), tonic::Status> { + match *err { + // some errors that should not retry + CurpError::Duplicated(()) + | CurpError::ShuttingDown(()) + | CurpError::InvalidConfig(()) + | CurpError::NodeNotExists(()) + | CurpError::NodeAlreadyExists(()) + | CurpError::LearnerNotCatchUp(()) => { + return Err(tonic::Status::from(err.clone())); + } + + // some errors that could have a retry + CurpError::ExpiredClientId(()) + | CurpError::KeyConflict(()) + | CurpError::Internal(_) + | CurpError::LeaderTransfer(_) => {} + + // Some error that needs to update cluster state + CurpError::RpcTransport(()) + | CurpError::WrongClusterVersion(()) + | CurpError::Redirect(_) // FIXME: The redirect error needs to include full cluster state + | CurpError::Zombie(()) => { + let new_cluster_state = self.fetch.fetch_cluster(cluster_state).await?; + // TODO: Prevent concurrent updating cluster state + *self.cluster_state.write() = new_cluster_state; + } + } + + Ok(()) + } } #[async_trait] @@ -230,7 +268,7 @@ where token: Option<&String>, use_fast_path: bool, ) -> Result, tonic::Status> { - self.retry::<_, _>(|client| async move { + self.retry::<_, _>(|client, _ctx| async move { let propose_id = self.inner.gen_propose_id().await?; RepeatableClientApi::propose(client, *propose_id, cmd, token, use_fast_path).await }) @@ -242,7 +280,7 @@ where &self, changes: Vec, ) -> Result, tonic::Status> { - self.retry::<_, _>(|client| { + self.retry::<_, _>(|client, _ctx| { let changes_c = changes.clone(); async move { let propose_id = self.inner.gen_propose_id().await?; @@ -254,7 +292,7 @@ where /// Send propose to shutdown cluster async fn propose_shutdown(&self) -> Result<(), tonic::Status> { - self.retry::<_, _>(|client| async move { + self.retry::<_, _>(|client, _ctx| async move { let propose_id = self.inner.gen_propose_id().await?; RepeatableClientApi::propose_shutdown(client, *propose_id).await }) @@ -268,7 +306,7 @@ where node_name: String, node_client_urls: Vec, ) -> Result<(), Self::Error> { - self.retry::<_, _>(|client| { + self.retry::<_, _>(|client, _ctx| { let name_c = node_name.clone(); let node_client_urls_c = node_client_urls.clone(); async move { @@ -288,13 +326,13 @@ where /// Send move leader request async fn move_leader(&self, node_id: u64) -> Result<(), Self::Error> { - self.retry::<_, _>(|client| client.move_leader(node_id)) + self.retry::<_, _>(|client, _ctx| client.move_leader(node_id)) .await } /// Send fetch read state from leader async fn fetch_read_state(&self, cmd: &Self::Cmd) -> Result { - self.retry::<_, _>(|client| client.fetch_read_state(cmd)) + self.retry::<_, _>(|client, _ctx| client.fetch_read_state(cmd)) .await } @@ -306,7 +344,7 @@ where &self, linearizable: bool, ) -> Result { - self.retry::<_, _>(|client| client.fetch_cluster(linearizable)) + self.retry::<_, _>(|client, _ctx| client.fetch_cluster(linearizable)) .await } } diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index 39c8b88bc..1ec9b7971 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -1,36 +1,32 @@ use std::{ collections::HashMap, - sync::{atomic::AtomicU64, Arc, Mutex}, + sync::{Arc, Mutex}, time::{Duration, Instant}, }; use curp_test_utils::test_cmd::{LogIndexResult, TestCommand, TestCommandResult}; -use futures::{future::BoxFuture, Stream}; #[cfg(not(madsim))] use tonic::transport::ClientTlsConfig; -use tonic::Status; use tracing_test::traced_test; #[cfg(madsim)] use utils::ClientTlsConfig; use super::{ state::State, - stream::{Streaming, StreamingConfig}, unary::{Unary, UnaryConfig}, }; use crate::{ client::{ + fetch::Fetch, + keep_alive::KeepAlive, retry::{Retry, RetryConfig}, ClientApi, }, members::ServerId, rpc::{ connect::{ConnectApi, MockConnectApi}, - CurpError, FetchClusterRequest, FetchClusterResponse, FetchReadStateRequest, - FetchReadStateResponse, Member, MoveLeaderRequest, MoveLeaderResponse, OpResponse, - ProposeConfChangeRequest, ProposeConfChangeResponse, ProposeRequest, ProposeResponse, - PublishRequest, PublishResponse, ReadIndexResponse, RecordRequest, RecordResponse, - ResponseOp, ShutdownRequest, ShutdownResponse, SyncedResponse, + CurpError, FetchClusterResponse, Member, OpResponse, ProposeResponse, ReadIndexResponse, + RecordResponse, ResponseOp, SyncedResponse, }, }; @@ -472,7 +468,8 @@ async fn test_retry_propose_return_no_retry_error() { let retry = Retry::new( unary, RetryConfig::new_fixed(Duration::from_millis(100), 5), - None, + KeepAlive::new(Duration::from_secs(1)), + Fetch::default(), ); let err = retry .propose(&TestCommand::new_put(vec![1], 1), None, false) @@ -522,7 +519,8 @@ async fn test_retry_propose_return_retry_error() { let retry = Retry::new( unary, RetryConfig::new_fixed(Duration::from_millis(10), 5), - None, + KeepAlive::new(Duration::from_secs(1)), + Fetch::default(), ); let err = retry .propose(&TestCommand::new_put(vec![1], 1), None, false) @@ -595,242 +593,263 @@ async fn test_read_index_fail() { assert!(res.is_err()); } -// Tests for stream client - -struct MockedStreamConnectApi { - id: ServerId, - lease_keep_alive_handle: - Box) -> BoxFuture<'static, CurpError> + Send + Sync + 'static>, -} - -#[async_trait::async_trait] -impl ConnectApi for MockedStreamConnectApi { - /// Get server id - fn id(&self) -> ServerId { - self.id - } - - /// Update server addresses, the new addresses will override the old ones - async fn update_addrs(&self, _addrs: Vec) -> Result<(), tonic::transport::Error> { - Ok(()) - } - - /// Send `ProposeRequest` - async fn propose_stream( - &self, - _request: ProposeRequest, - _token: Option, - _timeout: Duration, - ) -> Result> + Send>>, CurpError> - { - unreachable!("please use MockedConnectApi") - } - - /// Send `RecordRequest` - async fn record( - &self, - _request: RecordRequest, - _timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") - } +// TODO: rewrite these tests +#[cfg(ignore)] +mod test_stream { + use super::*; - /// Send `ReadIndexRequest` - async fn read_index( - &self, - _timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") - } + // Tests for stream client - /// Send `ProposeConfChange` - async fn propose_conf_change( - &self, - _request: ProposeConfChangeRequest, - _timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") + struct MockedStreamConnectApi { + id: ServerId, + lease_keep_alive_handle: + Box BoxFuture<'static, Result> + Send + Sync + 'static>, } - /// Send `PublishRequest` - async fn publish( - &self, - _request: PublishRequest, - _timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") + #[async_trait::async_trait] + impl ConnectApi for MockedStreamConnectApi { + /// Get server id + fn id(&self) -> ServerId { + self.id + } + + /// Update server addresses, the new addresses will override the old ones + async fn update_addrs(&self, _addrs: Vec) -> Result<(), tonic::transport::Error> { + Ok(()) + } + + /// Send `ProposeRequest` + async fn propose_stream( + &self, + _request: ProposeRequest, + _token: Option, + _timeout: Duration, + ) -> Result< + tonic::Response> + Send>>, + CurpError, + > { + unreachable!("please use MockedConnectApi") + } + + /// Send `RecordRequest` + async fn record( + &self, + _request: RecordRequest, + _timeout: Duration, + ) -> Result, CurpError> { + unreachable!("please use MockedConnectApi") + } + + /// Send `ReadIndexRequest` + async fn read_index( + &self, + _timeout: Duration, + ) -> Result, CurpError> { + unreachable!("please use MockedConnectApi") + } + + /// Send `ProposeConfChange` + async fn propose_conf_change( + &self, + _request: ProposeConfChangeRequest, + _timeout: Duration, + ) -> Result, CurpError> { + unreachable!("please use MockedConnectApi") + } + + /// Send `PublishRequest` + async fn publish( + &self, + _request: PublishRequest, + _timeout: Duration, + ) -> Result, CurpError> { + unreachable!("please use MockedConnectApi") + } + + /// Send `ShutdownRequest` + async fn shutdown( + &self, + _request: ShutdownRequest, + _timeout: Duration, + ) -> Result, CurpError> { + unreachable!("please use MockedConnectApi") + } + + /// Send `FetchClusterRequest` + async fn fetch_cluster( + &self, + _request: FetchClusterRequest, + _timeout: Duration, + ) -> Result, CurpError> { + unreachable!("please use MockedConnectApi") + } + + /// Send `FetchReadStateRequest` + async fn fetch_read_state( + &self, + _request: FetchReadStateRequest, + _timeout: Duration, + ) -> Result, CurpError> { + unreachable!("please use MockedConnectApi") + } + + /// Send `MoveLeaderRequest` + async fn move_leader( + &self, + _request: MoveLeaderRequest, + _timeout: Duration, + ) -> Result, CurpError> { + unreachable!("please use MockedConnectApi") + } + + /// Keep send lease keep alive to server and mutate the client id + async fn lease_keep_alive( + &self, + client_id: u64, + _interval: Duration, + ) -> Result { + (self.lease_keep_alive_handle)(client_id).await + } } - /// Send `ShutdownRequest` - async fn shutdown( - &self, - _request: ShutdownRequest, - _timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") + /// Create mocked stream connects + /// + /// The leader is S0 + #[allow(trivial_casts)] // cannot be inferred + fn init_mocked_stream_connects( + size: usize, + leader_idx: usize, + leader_term: u64, + keep_alive_handle: impl Fn(u64) -> BoxFuture<'static, Result> + + Send + + Sync + + 'static, + ) -> HashMap> { + let mut keep_alive_handle = Some(keep_alive_handle); + let redirect_handle = move |_id| { + Box::pin(async move { + Err(CurpError::redirect( + Some(leader_idx as ServerId), + leader_term, + )) + }) as BoxFuture<'static, Result> + }; + (0..size) + .map(|id| MockedStreamConnectApi { + id: id as ServerId, + lease_keep_alive_handle: if id == leader_idx { + Box::new(keep_alive_handle.take().unwrap()) + } else { + Box::new(redirect_handle) + }, + }) + .enumerate() + .map(|(id, api)| (id as ServerId, Arc::new(api) as Arc)) + .collect() } - /// Send `FetchClusterRequest` - async fn fetch_cluster( - &self, - _request: FetchClusterRequest, - _timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") + /// Create stream client for test + fn init_stream_client( + connects: HashMap>, + local_server: Option, + leader: Option, + term: u64, + cluster_version: u64, + ) -> Streaming { + let state = State::new_arc(connects, local_server, leader, term, cluster_version, None); + Streaming::new(state, StreamingConfig::new(Duration::from_secs(1))) } - /// Send `FetchReadStateRequest` - async fn fetch_read_state( - &self, - _request: FetchReadStateRequest, - _timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") + #[traced_test] + #[tokio::test] + async fn test_stream_client_keep_alive_works() { + let connects = init_mocked_stream_connects(5, 0, 1, move |client_id| { + Box::pin(async move { + client_id + .compare_exchange( + 1, + 10, + std::sync::atomic::Ordering::Relaxed, + std::sync::atomic::Ordering::Relaxed, + ) + .unwrap(); + tokio::time::sleep(Duration::from_secs(30)).await; + unreachable!("test timeout") + }) + }); + let stream = init_stream_client(connects, None, Some(0), 1, 1); + tokio::time::timeout(Duration::from_millis(100), stream.keep_heartbeat()) + .await + .unwrap_err(); + assert_eq!(stream.state.client_id(), 10); } - /// Send `MoveLeaderRequest` - async fn move_leader( - &self, - _request: MoveLeaderRequest, - _timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") + #[traced_test] + #[tokio::test] + async fn test_stream_client_keep_alive_on_redirect() { + let connects = init_mocked_stream_connects(5, 0, 2, move |client_id| { + Box::pin(async move { + client_id + .compare_exchange( + 1, + 10, + std::sync::atomic::Ordering::Relaxed, + std::sync::atomic::Ordering::Relaxed, + ) + .unwrap(); + tokio::time::sleep(Duration::from_secs(30)).await; + unreachable!("test timeout") + }) + }); + let stream = init_stream_client(connects, None, Some(1), 1, 1); + tokio::time::timeout(Duration::from_millis(100), stream.keep_heartbeat()) + .await + .unwrap_err(); + assert_eq!(stream.state.client_id(), 10); } - /// Keep send lease keep alive to server and mutate the client id - async fn lease_keep_alive(&self, client_id: Arc, _interval: Duration) -> CurpError { - (self.lease_keep_alive_handle)(Arc::clone(&client_id)).await + #[traced_test] + #[tokio::test] + async fn test_stream_client_keep_alive_hang_up_on_bypassed() { + let connects = init_mocked_stream_connects(5, 0, 1, |_client_id| { + Box::pin( + async move { panic!("should not invoke lease_keep_alive in bypassed connection") }, + ) + }); + let stream = init_stream_client(connects, Some(0), Some(0), 1, 1); + tokio::time::timeout(Duration::from_millis(100), stream.keep_heartbeat()) + .await + .unwrap_err(); + assert_ne!(stream.state.client_id(), 0); } -} - -/// Create mocked stream connects -/// -/// The leader is S0 -#[allow(trivial_casts)] // cannot be inferred -fn init_mocked_stream_connects( - size: usize, - leader_idx: usize, - leader_term: u64, - keep_alive_handle: impl Fn(Arc) -> BoxFuture<'static, CurpError> + Send + Sync + 'static, -) -> HashMap> { - let mut keep_alive_handle = Some(keep_alive_handle); - let redirect_handle = move |_id| { - Box::pin(async move { CurpError::redirect(Some(leader_idx as ServerId), leader_term) }) - as BoxFuture<'static, CurpError> - }; - (0..size) - .map(|id| MockedStreamConnectApi { - id: id as ServerId, - lease_keep_alive_handle: if id == leader_idx { - Box::new(keep_alive_handle.take().unwrap()) - } else { - Box::new(redirect_handle) - }, - }) - .enumerate() - .map(|(id, api)| (id as ServerId, Arc::new(api) as Arc)) - .collect() -} - -/// Create stream client for test -fn init_stream_client( - connects: HashMap>, - local_server: Option, - leader: Option, - term: u64, - cluster_version: u64, -) -> Streaming { - let state = State::new_arc(connects, local_server, leader, term, cluster_version, None); - Streaming::new(state, StreamingConfig::new(Duration::from_secs(1))) -} - -#[traced_test] -#[tokio::test] -async fn test_stream_client_keep_alive_works() { - let connects = init_mocked_stream_connects(5, 0, 1, move |client_id| { - Box::pin(async move { - client_id - .compare_exchange( - 1, - 10, - std::sync::atomic::Ordering::Relaxed, - std::sync::atomic::Ordering::Relaxed, - ) - .unwrap(); - tokio::time::sleep(Duration::from_secs(30)).await; - unreachable!("test timeout") - }) - }); - let stream = init_stream_client(connects, None, Some(0), 1, 1); - tokio::time::timeout(Duration::from_millis(100), stream.keep_heartbeat()) - .await - .unwrap_err(); - assert_eq!(stream.state.client_id(), 10); -} - -#[traced_test] -#[tokio::test] -async fn test_stream_client_keep_alive_on_redirect() { - let connects = init_mocked_stream_connects(5, 0, 2, move |client_id| { - Box::pin(async move { - client_id - .compare_exchange( - 1, - 10, - std::sync::atomic::Ordering::Relaxed, - std::sync::atomic::Ordering::Relaxed, - ) - .unwrap(); - tokio::time::sleep(Duration::from_secs(30)).await; - unreachable!("test timeout") - }) - }); - let stream = init_stream_client(connects, None, Some(1), 1, 1); - tokio::time::timeout(Duration::from_millis(100), stream.keep_heartbeat()) - .await - .unwrap_err(); - assert_eq!(stream.state.client_id(), 10); -} - -#[traced_test] -#[tokio::test] -async fn test_stream_client_keep_alive_hang_up_on_bypassed() { - let connects = init_mocked_stream_connects(5, 0, 1, |_client_id| { - Box::pin(async move { panic!("should not invoke lease_keep_alive in bypassed connection") }) - }); - let stream = init_stream_client(connects, Some(0), Some(0), 1, 1); - tokio::time::timeout(Duration::from_millis(100), stream.keep_heartbeat()) - .await - .unwrap_err(); - assert_ne!(stream.state.client_id(), 0); -} -#[traced_test] -#[tokio::test] -#[allow(clippy::ignored_unit_patterns)] // tokio select internal triggered -async fn test_stream_client_keep_alive_resume_on_leadership_changed() { - let connects = init_mocked_stream_connects(5, 1, 2, move |client_id| { - Box::pin(async move { - // generated a client id for bypassed client - assert_ne!(client_id.load(std::sync::atomic::Ordering::Relaxed), 0); - client_id.store(10, std::sync::atomic::Ordering::Relaxed); - tokio::time::sleep(Duration::from_secs(30)).await; - unreachable!("test timeout") - }) - }); - let stream = init_stream_client(connects, Some(0), Some(0), 1, 1); - let update_leader = async { - // wait for stream to hang up - tokio::time::sleep(Duration::from_millis(100)).await; - // check the local id - assert_ne!(stream.state.client_id(), 0); - stream.state.check_and_update_leader(Some(1), 2).await; - // wait for stream to resume - tokio::time::sleep(Duration::from_millis(100)).await; - }; - tokio::select! { - _ = stream.keep_heartbeat() => {}, - _ = update_leader => {} + #[traced_test] + #[tokio::test] + #[allow(clippy::ignored_unit_patterns)] // tokio select internal triggered + async fn test_stream_client_keep_alive_resume_on_leadership_changed() { + let connects = init_mocked_stream_connects(5, 1, 2, move |client_id| { + Box::pin(async move { + // generated a client id for bypassed client + assert_ne!(client_id.load(std::sync::atomic::Ordering::Relaxed), 0); + client_id.store(10, std::sync::atomic::Ordering::Relaxed); + tokio::time::sleep(Duration::from_secs(30)).await; + unreachable!("test timeout") + }) + }); + let stream = init_stream_client(connects, Some(0), Some(0), 1, 1); + let update_leader = async { + // wait for stream to hang up + tokio::time::sleep(Duration::from_millis(100)).await; + // check the local id + assert_ne!(stream.state.client_id(), 0); + stream.state.check_and_update_leader(Some(1), 2).await; + // wait for stream to resume + tokio::time::sleep(Duration::from_millis(100)).await; + }; + tokio::select! { + _ = stream.keep_heartbeat() => {}, + _ = update_leader => {} + } + assert_eq!(stream.state.client_id(), 10); } - assert_eq!(stream.state.client_id(), 10); } diff --git a/crates/curp/src/rpc/connect.rs b/crates/curp/src/rpc/connect.rs index c62b37d31..68c8ae18d 100644 --- a/crates/curp/src/rpc/connect.rs +++ b/crates/curp/src/rpc/connect.rs @@ -2,7 +2,7 @@ use std::{ collections::{HashMap, HashSet}, fmt::{Debug, Formatter}, ops::Deref, - sync::{atomic::AtomicU64, Arc}, + sync::Arc, time::Duration, }; @@ -223,7 +223,7 @@ pub(crate) trait ConnectApi: Send + Sync + 'static { ) -> Result, CurpError>; /// Keep send lease keep alive to server and mutate the client id - async fn lease_keep_alive(&self, client_id: Arc, interval: Duration) -> CurpError; + async fn lease_keep_alive(&self, client_id: u64, interval: Duration) -> Result; } /// Inner Connect interface among different servers @@ -513,22 +513,18 @@ impl ConnectApi for Connect> { with_timeout!(timeout, client.move_leader(req)).map_err(Into::into) } - /// Keep send lease keep alive to server and mutate the client id - async fn lease_keep_alive(&self, client_id: Arc, interval: Duration) -> CurpError { + /// Keep send lease keep alive to server with the current client id + async fn lease_keep_alive(&self, client_id: u64, interval: Duration) -> Result { let mut client = self.rpc_connect.clone(); - loop { - let stream = heartbeat_stream( - client_id.load(std::sync::atomic::Ordering::Relaxed), - interval, - ); - let new_id = match client.lease_keep_alive(stream).await { - Err(err) => return err.into(), - Ok(res) => res.into_inner().client_id, - }; - // The only place to update the client id for follower - info!("client_id update to {new_id}"); - client_id.store(new_id, std::sync::atomic::Ordering::Relaxed); - } + let stream = heartbeat_stream(client_id, interval); + let new_id = client + .lease_keep_alive(stream) + .await? + .into_inner() + .client_id; + // The only place to update the client id for follower + info!("client_id update to {new_id}"); + Ok(new_id) } } @@ -812,7 +808,11 @@ where } /// Keep send lease keep alive to server and mutate the client id - async fn lease_keep_alive(&self, _client_id: Arc, _interval: Duration) -> CurpError { + async fn lease_keep_alive( + &self, + _client_id: u64, + _interval: Duration, + ) -> Result { unreachable!("cannot invoke lease_keep_alive in bypassed connect") } } diff --git a/crates/curp/src/rpc/reconnect.rs b/crates/curp/src/rpc/reconnect.rs index e392db38a..f92844234 100644 --- a/crates/curp/src/rpc/reconnect.rs +++ b/crates/curp/src/rpc/reconnect.rs @@ -1,7 +1,4 @@ -use std::{ - sync::{atomic::AtomicU64, Arc}, - time::Duration, -}; +use std::time::Duration; use async_trait::async_trait; use event_listener::Event; @@ -48,7 +45,6 @@ impl Reconnect { // Cancel the leader keep alive loop task because it hold a read lock let _cancel = self.event.notify(1); let _ignore = self.connect.write().await.replace(new_connect); - // After connection is updated, notify to start the keep alive loop let _continue = self.event.notify(1); } @@ -178,21 +174,15 @@ impl ConnectApi for Reconnect { } /// Keep send lease keep alive to server and mutate the client id - async fn lease_keep_alive(&self, client_id: Arc, interval: Duration) -> CurpError { - loop { - let connect = self.connect.read().await; - let connect_ref = connect.as_ref().unwrap(); - tokio::select! { - err = connect_ref.lease_keep_alive(Arc::clone(&client_id), interval) => { - return err; - } - _empty = self.event.listen() => {}, - } - // Creates the listener before dropping the read lock. - // This prevents us from losting the event. - let listener = self.event.listen(); - drop(connect); - let _connection_updated = listener.await; - } + async fn lease_keep_alive(&self, client_id: u64, interval: Duration) -> Result { + let connect = self.connect.read().await; + let connect_ref = connect.as_ref().unwrap(); + let result = tokio::select! { + result = connect_ref.lease_keep_alive(client_id, interval) => result, + _empty = self.event.listen() => Err(CurpError::RpcTransport(())), + }; + // Wait for connection update + self.event.listen().await; + result } } From 311fed498b870f145bde6bbef72d0e6473ea90b5 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 08:34:39 +0800 Subject: [PATCH 093/322] refactor: move some mutable state from `Unary` to `Retry` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/mod.rs | 13 +-- crates/curp/src/client/retry.rs | 94 +++++++++++----- crates/curp/src/client/unary/mod.rs | 112 +++++-------------- crates/curp/src/client/unary/propose_impl.rs | 26 ++++- 4 files changed, 127 insertions(+), 118 deletions(-) diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 6f054967a..9874886f7 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -58,7 +58,7 @@ use utils::{build_endpoint, config::ClientConfig}; use self::{ fetch::Fetch, keep_alive::KeepAlive, - retry::{Retry, RetryConfig}, + retry::{Context, Retry, RetryConfig}, state::StateBuilder, unary::{Unary, UnaryConfig}, }; @@ -182,36 +182,33 @@ impl Drop for ProposeIdGuard<'_> { /// This trait override some unrepeatable methods in ClientApi, and a client with this trait will be able to retry. #[async_trait] trait RepeatableClientApi: ClientApi { - /// Generate a unique propose id during the retry process. - async fn gen_propose_id(&self) -> Result, Self::Error>; - /// Send propose to the whole cluster, `use_fast_path` set to `false` to fallback into ordered /// requests (event the requests are commutative). async fn propose( &self, - propose_id: ProposeId, cmd: &Self::Cmd, token: Option<&String>, use_fast_path: bool, + ctx: Context, ) -> Result, Self::Error>; /// Send propose configuration changes to the cluster async fn propose_conf_change( &self, - propose_id: ProposeId, changes: Vec, + ctx: Context, ) -> Result, Self::Error>; /// Send propose to shutdown cluster - async fn propose_shutdown(&self, id: ProposeId) -> Result<(), Self::Error>; + async fn propose_shutdown(&self, ctx: Context) -> Result<(), Self::Error>; /// Send propose to publish a node id and name async fn propose_publish( &self, - propose_id: ProposeId, node_id: ServerId, node_name: String, node_client_urls: Vec, + ctx: Context, ) -> Result<(), Self::Error>; } diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 387e4dbdb..38b27acb4 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -1,4 +1,8 @@ -use std::{ops::SubAssign, sync::Arc, time::Duration}; +use std::{ + ops::SubAssign, + sync::{atomic::AtomicU64, Arc}, + time::Duration, +}; use async_trait::async_trait; use futures::Future; @@ -9,11 +13,12 @@ use super::{ cluster_state::ClusterState, fetch::Fetch, keep_alive::{KeepAlive, KeepAliveHandle}, - ClientApi, LeaderStateUpdate, ProposeResponse, RepeatableClientApi, + ClientApi, LeaderStateUpdate, ProposeIdGuard, ProposeResponse, RepeatableClientApi, }; use crate::{ members::ServerId, - rpc::{ConfChange, CurpError, FetchClusterResponse, Member, ReadState}, + rpc::{ConfChange, CurpError, FetchClusterResponse, Member, ProposeId, ReadState}, + tracker::Tracker, }; /// Backoff config @@ -103,24 +108,36 @@ impl Backoff { /// The context of a retry #[derive(Debug)] pub(crate) struct Context { - /// The current client id - client_id: u64, + /// The propose id + propose_id: ProposeId, + /// First incomplete seqence + first_incomplete: u64, /// The current cluster state cluster_state: ClusterState, } impl Context { /// Creates a new `Context` - pub(crate) fn new(client_id: u64, cluster_state: ClusterState) -> Self { + pub(crate) fn new( + propose_id: ProposeId, + first_incomplete: u64, + cluster_state: ClusterState, + ) -> Self { Self { - client_id, + propose_id, + first_incomplete, cluster_state, } } - /// Returns the current client id - pub(crate) fn client_id(&self) -> u64 { - self.client_id + /// Returns the current propose id + pub(crate) fn propose_id(&self) -> ProposeId { + self.propose_id + } + + /// Returns the first incomplete sequence number + pub(crate) fn first_incomplete(&self) -> u64 { + self.first_incomplete } /// Returns the current client id @@ -129,6 +146,34 @@ impl Context { } } +/// Command tracker +#[derive(Debug, Default)] +struct CmdTracker { + /// Last sent sequence number + last_sent_seq: AtomicU64, + /// Request tracker + tracker: RwLock, +} + +impl CmdTracker { + /// New a seq num and record it + fn new_seq_num(&self) -> u64 { + self.last_sent_seq + .fetch_add(1, std::sync::atomic::Ordering::Relaxed) + } + + /// Generate a unique propose id during the retry process. + fn gen_propose_id(&self, client_id: u64) -> ProposeIdGuard<'_> { + let seq_num = self.new_seq_num(); + ProposeIdGuard::new(&self.tracker, ProposeId(client_id, seq_num)) + } + + /// Generate a unique propose id during the retry process. + fn first_incomplete(&self) -> u64 { + self.tracker.read().first_incomplete() + } +} + /// The retry client automatically retry the requests of the inner client api /// which raises the [`tonic::Status`] error #[derive(Debug)] @@ -143,6 +188,8 @@ pub(super) struct Retry { keep_alive: KeepAliveHandle, /// Fetch cluster object fetch: Fetch, + /// Command tracker + tracker: CmdTracker, } impl Retry @@ -165,6 +212,7 @@ where cluster_state, keep_alive: keep_alive_handle, fetch, + tracker: CmdTracker::default(), } } @@ -179,9 +227,11 @@ where let mut backoff = self.retry_config.init_backoff(); let mut last_err = None; let client_id = self.keep_alive.wait_id_update(0).await; + let propose_id_guard = self.tracker.gen_propose_id(client_id); + let first_incomplete = self.tracker.first_incomplete(); while let Some(delay) = backoff.next_delay() { let cluster_state = self.cluster_state.read().clone(); - let context = Context::new(client_id, cluster_state.clone()); + let context = Context::new(*propose_id_guard, first_incomplete, cluster_state.clone()); let result = tokio::select! { result = f(&self.inner, context) => result, _ = self.keep_alive.wait_id_update(client_id) => { @@ -268,9 +318,8 @@ where token: Option<&String>, use_fast_path: bool, ) -> Result, tonic::Status> { - self.retry::<_, _>(|client, _ctx| async move { - let propose_id = self.inner.gen_propose_id().await?; - RepeatableClientApi::propose(client, *propose_id, cmd, token, use_fast_path).await + self.retry::<_, _>(|client, ctx| async move { + RepeatableClientApi::propose(client, cmd, token, use_fast_path, ctx).await }) .await } @@ -280,21 +329,17 @@ where &self, changes: Vec, ) -> Result, tonic::Status> { - self.retry::<_, _>(|client, _ctx| { + self.retry::<_, _>(|client, ctx| { let changes_c = changes.clone(); - async move { - let propose_id = self.inner.gen_propose_id().await?; - RepeatableClientApi::propose_conf_change(client, *propose_id, changes_c).await - } + async move { RepeatableClientApi::propose_conf_change(client, changes_c, ctx).await } }) .await } /// Send propose to shutdown cluster async fn propose_shutdown(&self) -> Result<(), tonic::Status> { - self.retry::<_, _>(|client, _ctx| async move { - let propose_id = self.inner.gen_propose_id().await?; - RepeatableClientApi::propose_shutdown(client, *propose_id).await + self.retry::<_, _>(|client, ctx| async move { + RepeatableClientApi::propose_shutdown(client, ctx).await }) .await } @@ -306,17 +351,16 @@ where node_name: String, node_client_urls: Vec, ) -> Result<(), Self::Error> { - self.retry::<_, _>(|client, _ctx| { + self.retry::<_, _>(|client, ctx| { let name_c = node_name.clone(); let node_client_urls_c = node_client_urls.clone(); async move { - let propose_id = self.inner.gen_propose_id().await?; RepeatableClientApi::propose_publish( client, - *propose_id, node_id, name_c, node_client_urls_c, + ctx, ) .await } diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 41f510e80..dbfac93d6 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -1,33 +1,26 @@ /// Client propose implementation mod propose_impl; -use std::{ - cmp::Ordering, - marker::PhantomData, - sync::{atomic::AtomicU64, Arc}, - time::Duration, -}; +use std::{cmp::Ordering, marker::PhantomData, sync::Arc, time::Duration}; use async_trait::async_trait; use curp_external_api::cmd::Command; use futures::{Future, StreamExt}; -use parking_lot::RwLock; use tonic::Response; use tracing::{debug, warn}; use super::{ - cluster_state::ClusterState, config::Config, state::State, ClientApi, LeaderStateUpdate, - ProposeIdGuard, ProposeResponse, RepeatableClientApi, + retry::Context, state::State, ClientApi, LeaderStateUpdate, ProposeResponse, + RepeatableClientApi, }; use crate::{ members::ServerId, quorum, rpc::{ connect::ConnectApi, ConfChange, CurpError, FetchClusterRequest, FetchClusterResponse, - FetchReadStateRequest, Member, MoveLeaderRequest, ProposeConfChangeRequest, ProposeId, - PublishRequest, ReadState, ShutdownRequest, + FetchReadStateRequest, Member, MoveLeaderRequest, ProposeConfChangeRequest, PublishRequest, + ReadState, ShutdownRequest, }, - tracker::Tracker, }; /// The unary client config @@ -58,19 +51,8 @@ pub(super) struct Unary { state: Arc, /// Unary config config: UnaryConfig, - /// Request tracker - tracker: RwLock, - /// Last sent sequence number - last_sent_seq: AtomicU64, /// marker phantom: PhantomData, - - #[allow(dead_code)] - /// Cluster state - cluster_state: RwLock, - #[allow(dead_code)] - /// Cluster state - client_config: Config, } impl Unary { @@ -79,13 +61,7 @@ impl Unary { Self { state, config, - tracker: RwLock::new(Tracker::default()), - last_sent_seq: AtomicU64::new(0), phantom: PhantomData, - - // TODO: build cluster state - cluster_state: RwLock::default(), - client_config: Config::default(), } } @@ -120,13 +96,6 @@ impl Unary { None => as ClientApi>::fetch_leader_id(self, false).await, } } - - /// New a seq num and record it - #[allow(clippy::unused_self)] // TODO: implement request tracker - fn new_seq_num(&self) -> u64 { - self.last_sent_seq - .fetch_add(1, std::sync::atomic::Ordering::Relaxed) - } } #[async_trait] @@ -141,45 +110,34 @@ impl ClientApi for Unary { /// requests (event the requests are commutative). async fn propose( &self, - cmd: &C, - token: Option<&String>, - use_fast_path: bool, + _cmd: &C, + _token: Option<&String>, + _use_fast_path: bool, ) -> Result, CurpError> { - let propose_id = self.gen_propose_id().await?; - RepeatableClientApi::propose(self, *propose_id, cmd, token, use_fast_path).await + unimplemented!("please use `Retry>`"); } /// Send propose configuration changes to the cluster async fn propose_conf_change( &self, - changes: Vec, + _changes: Vec, ) -> Result, CurpError> { - let propose_id = self.gen_propose_id().await?; - RepeatableClientApi::propose_conf_change(self, *propose_id, changes).await + unimplemented!("please use `Retry>`"); } /// Send propose to shutdown cluster async fn propose_shutdown(&self) -> Result<(), CurpError> { - let propose_id = self.gen_propose_id().await?; - RepeatableClientApi::propose_shutdown(self, *propose_id).await + unimplemented!("please use `Retry>`"); } /// Send propose to publish a node id and name async fn propose_publish( &self, - node_id: ServerId, - node_name: String, - node_client_urls: Vec, + _node_id: ServerId, + _node_name: String, + _node_client_urls: Vec, ) -> Result<(), Self::Error> { - let propose_id = self.gen_propose_id().await?; - RepeatableClientApi::propose_publish( - self, - *propose_id, - node_id, - node_name, - node_client_urls, - ) - .await + unimplemented!("please use `Retry>`"); } /// Send move leader request @@ -316,33 +274,22 @@ impl ClientApi for Unary { #[async_trait] impl RepeatableClientApi for Unary { - /// Generate a unique propose id during the retry process. - async fn gen_propose_id(&self) -> Result, Self::Error> { - let mut client_id = self.state.client_id(); - if client_id == 0 { - client_id = self.state.wait_for_client_id().await?; - }; - let seq_num = self.new_seq_num(); - Ok(ProposeIdGuard::new( - &self.tracker, - ProposeId(client_id, seq_num), - )) - } - /// Send propose to the whole cluster, `use_fast_path` set to `false` to fallback into ordered /// requests (event the requests are commutative). async fn propose( &self, - propose_id: ProposeId, cmd: &Self::Cmd, token: Option<&String>, use_fast_path: bool, + ctx: Context, ) -> Result, Self::Error> { + let propose_id = ctx.propose_id(); + let first_incomplete = ctx.first_incomplete(); if cmd.is_read_only() { - self.propose_read_only(cmd, propose_id, token, use_fast_path) + self.propose_read_only(cmd, propose_id, token, use_fast_path, first_incomplete) .await } else { - self.propose_mutative(cmd, propose_id, token, use_fast_path) + self.propose_mutative(cmd, propose_id, token, first_incomplete, use_fast_path) .await } } @@ -350,11 +297,14 @@ impl RepeatableClientApi for Unary { /// Send propose configuration changes to the cluster async fn propose_conf_change( &self, - propose_id: ProposeId, changes: Vec, + ctx: Context, ) -> Result, Self::Error> { - let req = - ProposeConfChangeRequest::new(propose_id, changes, self.state.cluster_version().await); + let req = ProposeConfChangeRequest::new( + ctx.propose_id(), + changes, + self.state.cluster_version().await, + ); let timeout = self.config.wait_synced_timeout; let members = self .map_leader(|conn| async move { conn.propose_conf_change(req, timeout).await }) @@ -365,8 +315,8 @@ impl RepeatableClientApi for Unary { } /// Send propose to shutdown cluster - async fn propose_shutdown(&self, propose_id: ProposeId) -> Result<(), Self::Error> { - let req = ShutdownRequest::new(propose_id, self.state.cluster_version().await); + async fn propose_shutdown(&self, ctx: Context) -> Result<(), Self::Error> { + let req = ShutdownRequest::new(ctx.propose_id(), self.state.cluster_version().await); let timeout = self.config.wait_synced_timeout; let _ig = self .map_leader(|conn| async move { conn.shutdown(req, timeout).await }) @@ -377,12 +327,12 @@ impl RepeatableClientApi for Unary { /// Send propose to publish a node id and name async fn propose_publish( &self, - propose_id: ProposeId, node_id: ServerId, node_name: String, node_client_urls: Vec, + ctx: Context, ) -> Result<(), Self::Error> { - let req = PublishRequest::new(propose_id, node_id, node_name, node_client_urls); + let req = PublishRequest::new(ctx.propose_id(), node_id, node_name, node_client_urls); let timeout = self.config.wait_synced_timeout; let _ig = self .map_leader(|conn| async move { conn.publish(req, timeout).await }) diff --git a/crates/curp/src/client/unary/propose_impl.rs b/crates/curp/src/client/unary/propose_impl.rs index d70033d9d..ea7aeb077 100644 --- a/crates/curp/src/client/unary/propose_impl.rs +++ b/crates/curp/src/client/unary/propose_impl.rs @@ -44,10 +44,11 @@ impl Unary { cmd: &C, propose_id: ProposeId, token: Option<&String>, + first_incomplete: u64, use_fast_path: bool, ) -> Result, CurpError> { let stream = self - .send_propose_mutative(cmd, propose_id, use_fast_path, token) + .send_propose_mutative(cmd, propose_id, use_fast_path, first_incomplete, token) .await?; let mut stream = Box::into_pin(stream); let first_two_events = ( @@ -84,10 +85,18 @@ impl Unary { propose_id: ProposeId, token: Option<&String>, use_fast_path: bool, + first_incomplete: u64, ) -> Result, CurpError> { let leader_id = self.leader_id().await?; let stream = self - .send_leader_propose(cmd, leader_id, propose_id, use_fast_path, token) + .send_leader_propose( + cmd, + leader_id, + propose_id, + use_fast_path, + first_incomplete, + token, + ) .await?; let mut stream_pinned = Box::into_pin(stream); if !self.send_read_index(leader_id).await { @@ -125,11 +134,19 @@ impl Unary { cmd: &C, propose_id: ProposeId, use_fast_path: bool, + first_incomplete: u64, token: Option<&String>, ) -> Result, CurpError> { let leader_id = self.leader_id().await?; let leader_stream = self - .send_leader_propose(cmd, leader_id, propose_id, use_fast_path, token) + .send_leader_propose( + cmd, + leader_id, + propose_id, + use_fast_path, + first_incomplete, + token, + ) .await?; let follower_stream = self.send_record(cmd, leader_id, propose_id).await; let select = stream::select(Box::into_pin(leader_stream), Box::into_pin(follower_stream)); @@ -144,6 +161,7 @@ impl Unary { leader_id: ServerId, propose_id: ProposeId, use_fast_path: bool, + first_incomplete: u64, token: Option<&String>, ) -> Result, CurpError> { let term = self.state.term().await; @@ -153,7 +171,7 @@ impl Unary { self.state.cluster_version().await, term, !use_fast_path, - self.tracker.read().first_incomplete(), + first_incomplete, ); let timeout = self.config.propose_timeout; let token = token.cloned(); From 512d8782e8c52aca6f86cc120c0fbf8b3c930de5 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 09:05:11 +0800 Subject: [PATCH 094/322] chore: remove unused `LeaderStateUpdate` trait and impls Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/mod.rs | 7 ------- crates/curp/src/client/retry.rs | 6 +++--- crates/curp/src/client/unary/mod.rs | 13 +------------ 3 files changed, 4 insertions(+), 22 deletions(-) diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 9874886f7..7927ce65d 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -212,13 +212,6 @@ trait RepeatableClientApi: ClientApi { ) -> Result<(), Self::Error>; } -/// Update leader state -#[async_trait] -trait LeaderStateUpdate { - /// update - async fn update_leader(&self, leader_id: Option, term: u64) -> bool; -} - /// Client builder to build a client #[derive(Debug, Clone, Default)] #[allow(clippy::module_name_repetitions)] // better than just Builder diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 38b27acb4..1ebde0f19 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -13,7 +13,7 @@ use super::{ cluster_state::ClusterState, fetch::Fetch, keep_alive::{KeepAlive, KeepAliveHandle}, - ClientApi, LeaderStateUpdate, ProposeIdGuard, ProposeResponse, RepeatableClientApi, + ClientApi, ProposeIdGuard, ProposeResponse, RepeatableClientApi, }; use crate::{ members::ServerId, @@ -194,7 +194,7 @@ pub(super) struct Retry { impl Retry where - Api: RepeatableClientApi + LeaderStateUpdate + Send + Sync + 'static, + Api: RepeatableClientApi + Send + Sync + 'static, { /// Create a retry client pub(super) fn new( @@ -302,7 +302,7 @@ where #[async_trait] impl ClientApi for Retry where - Api: RepeatableClientApi + LeaderStateUpdate + Send + Sync + 'static, + Api: RepeatableClientApi + Send + Sync + 'static, { /// The client error type Error = tonic::Status; diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index dbfac93d6..bc6fb5396 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -9,10 +9,7 @@ use futures::{Future, StreamExt}; use tonic::Response; use tracing::{debug, warn}; -use super::{ - retry::Context, state::State, ClientApi, LeaderStateUpdate, ProposeResponse, - RepeatableClientApi, -}; +use super::{retry::Context, state::State, ClientApi, ProposeResponse, RepeatableClientApi}; use crate::{ members::ServerId, quorum, @@ -340,11 +337,3 @@ impl RepeatableClientApi for Unary { Ok(()) } } - -#[async_trait] -impl LeaderStateUpdate for Unary { - /// Update leader - async fn update_leader(&self, leader_id: Option, term: u64) -> bool { - self.state.check_and_update_leader(leader_id, term).await - } -} From eb2b13fad2a4d0b1db984cb981b18bd5626de949 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 09:18:35 +0800 Subject: [PATCH 095/322] refactor: move `ClientApi` implementation to `RepeatableClientApi` for `Unary` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/mod.rs | 31 +++++- crates/curp/src/client/retry.rs | 6 +- crates/curp/src/client/unary/mod.rs | 149 ++++++++++++---------------- 3 files changed, 93 insertions(+), 93 deletions(-) diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 7927ce65d..7fa44b715 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -36,8 +36,9 @@ mod config; /// Lease keep alive implementation mod keep_alive; +// TODO: rewrite these tests /// Tests for client -#[cfg(test)] +#[cfg(ignore)] mod tests; #[cfg(madsim)] @@ -181,7 +182,13 @@ impl Drop for ProposeIdGuard<'_> { /// This trait override some unrepeatable methods in ClientApi, and a client with this trait will be able to retry. #[async_trait] -trait RepeatableClientApi: ClientApi { +trait RepeatableClientApi { + /// The client error + type Error; + + /// The command type + type Cmd: Command; + /// Send propose to the whole cluster, `use_fast_path` set to `false` to fallback into ordered /// requests (event the requests are commutative). async fn propose( @@ -210,6 +217,26 @@ trait RepeatableClientApi: ClientApi { node_client_urls: Vec, ctx: Context, ) -> Result<(), Self::Error>; + + /// Send move leader request + async fn move_leader(&self, node_id: u64, ctx: Context) -> Result<(), Self::Error>; + + /// Send fetch read state from leader + async fn fetch_read_state( + &self, + cmd: &Self::Cmd, + ctx: Context, + ) -> Result; + + /// Send fetch cluster requests to all servers (That's because initially, we didn't + /// know who the leader is.) + /// + /// Note: The fetched cluster may still be outdated if `linearizable` is false + async fn fetch_cluster( + &self, + linearizable: bool, + ctx: Context, + ) -> Result; } /// Client builder to build a client diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 1ebde0f19..56d991315 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -370,13 +370,13 @@ where /// Send move leader request async fn move_leader(&self, node_id: u64) -> Result<(), Self::Error> { - self.retry::<_, _>(|client, _ctx| client.move_leader(node_id)) + self.retry::<_, _>(|client, ctx| client.move_leader(node_id, ctx)) .await } /// Send fetch read state from leader async fn fetch_read_state(&self, cmd: &Self::Cmd) -> Result { - self.retry::<_, _>(|client, _ctx| client.fetch_read_state(cmd)) + self.retry::<_, _>(|client, ctx| client.fetch_read_state(cmd, ctx)) .await } @@ -388,7 +388,7 @@ where &self, linearizable: bool, ) -> Result { - self.retry::<_, _>(|client, _ctx| client.fetch_cluster(linearizable)) + self.retry::<_, _>(|client, ctx| client.fetch_cluster(linearizable, ctx)) .await } } diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index bc6fb5396..7bb71f647 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -96,7 +96,7 @@ impl Unary { } #[async_trait] -impl ClientApi for Unary { +impl RepeatableClientApi for Unary { /// The error is generated from server type Error = CurpError; @@ -107,38 +107,70 @@ impl ClientApi for Unary { /// requests (event the requests are commutative). async fn propose( &self, - _cmd: &C, - _token: Option<&String>, - _use_fast_path: bool, - ) -> Result, CurpError> { - unimplemented!("please use `Retry>`"); + cmd: &Self::Cmd, + token: Option<&String>, + use_fast_path: bool, + ctx: Context, + ) -> Result, Self::Error> { + let propose_id = ctx.propose_id(); + let first_incomplete = ctx.first_incomplete(); + if cmd.is_read_only() { + self.propose_read_only(cmd, propose_id, token, use_fast_path, first_incomplete) + .await + } else { + self.propose_mutative(cmd, propose_id, token, first_incomplete, use_fast_path) + .await + } } /// Send propose configuration changes to the cluster async fn propose_conf_change( &self, - _changes: Vec, - ) -> Result, CurpError> { - unimplemented!("please use `Retry>`"); + changes: Vec, + ctx: Context, + ) -> Result, Self::Error> { + let req = ProposeConfChangeRequest::new( + ctx.propose_id(), + changes, + self.state.cluster_version().await, + ); + let timeout = self.config.wait_synced_timeout; + let members = self + .map_leader(|conn| async move { conn.propose_conf_change(req, timeout).await }) + .await? + .into_inner() + .members; + Ok(members) } /// Send propose to shutdown cluster - async fn propose_shutdown(&self) -> Result<(), CurpError> { - unimplemented!("please use `Retry>`"); + async fn propose_shutdown(&self, ctx: Context) -> Result<(), Self::Error> { + let req = ShutdownRequest::new(ctx.propose_id(), self.state.cluster_version().await); + let timeout = self.config.wait_synced_timeout; + let _ig = self + .map_leader(|conn| async move { conn.shutdown(req, timeout).await }) + .await?; + Ok(()) } /// Send propose to publish a node id and name async fn propose_publish( &self, - _node_id: ServerId, - _node_name: String, - _node_client_urls: Vec, + node_id: ServerId, + node_name: String, + node_client_urls: Vec, + ctx: Context, ) -> Result<(), Self::Error> { - unimplemented!("please use `Retry>`"); + let req = PublishRequest::new(ctx.propose_id(), node_id, node_name, node_client_urls); + let timeout = self.config.wait_synced_timeout; + let _ig = self + .map_leader(|conn| async move { conn.publish(req, timeout).await }) + .await?; + Ok(()) } /// Send move leader request - async fn move_leader(&self, node_id: ServerId) -> Result<(), Self::Error> { + async fn move_leader(&self, node_id: u64, ctx: Context) -> Result<(), Self::Error> { let req = MoveLeaderRequest::new(node_id, self.state.cluster_version().await); let timeout = self.config.wait_synced_timeout; let _ig = self @@ -148,7 +180,11 @@ impl ClientApi for Unary { } /// Send fetch read state from leader - async fn fetch_read_state(&self, cmd: &C) -> Result { + async fn fetch_read_state( + &self, + cmd: &Self::Cmd, + ctx: Context, + ) -> Result { // Same as fast_round, we blame the serializing error to the server even // thought it is the local error let req = FetchReadStateRequest::new(cmd, self.state.cluster_version().await).map_err( @@ -167,9 +203,15 @@ impl ClientApi for Unary { Ok(state) } - /// Send fetch cluster requests to all servers + /// Send fetch cluster requests to all servers (That's because initially, we didn't + /// know who the leader is.) + /// /// Note: The fetched cluster may still be outdated if `linearizable` is false - async fn fetch_cluster(&self, linearizable: bool) -> Result { + async fn fetch_cluster( + &self, + linearizable: bool, + ctx: Context, + ) -> Result { let timeout = self.config.wait_synced_timeout; if !linearizable { // firstly, try to fetch the local server @@ -268,72 +310,3 @@ impl ClientApi for Unary { return Err(CurpError::RpcTransport(())); } } - -#[async_trait] -impl RepeatableClientApi for Unary { - /// Send propose to the whole cluster, `use_fast_path` set to `false` to fallback into ordered - /// requests (event the requests are commutative). - async fn propose( - &self, - cmd: &Self::Cmd, - token: Option<&String>, - use_fast_path: bool, - ctx: Context, - ) -> Result, Self::Error> { - let propose_id = ctx.propose_id(); - let first_incomplete = ctx.first_incomplete(); - if cmd.is_read_only() { - self.propose_read_only(cmd, propose_id, token, use_fast_path, first_incomplete) - .await - } else { - self.propose_mutative(cmd, propose_id, token, first_incomplete, use_fast_path) - .await - } - } - - /// Send propose configuration changes to the cluster - async fn propose_conf_change( - &self, - changes: Vec, - ctx: Context, - ) -> Result, Self::Error> { - let req = ProposeConfChangeRequest::new( - ctx.propose_id(), - changes, - self.state.cluster_version().await, - ); - let timeout = self.config.wait_synced_timeout; - let members = self - .map_leader(|conn| async move { conn.propose_conf_change(req, timeout).await }) - .await? - .into_inner() - .members; - Ok(members) - } - - /// Send propose to shutdown cluster - async fn propose_shutdown(&self, ctx: Context) -> Result<(), Self::Error> { - let req = ShutdownRequest::new(ctx.propose_id(), self.state.cluster_version().await); - let timeout = self.config.wait_synced_timeout; - let _ig = self - .map_leader(|conn| async move { conn.shutdown(req, timeout).await }) - .await?; - Ok(()) - } - - /// Send propose to publish a node id and name - async fn propose_publish( - &self, - node_id: ServerId, - node_name: String, - node_client_urls: Vec, - ctx: Context, - ) -> Result<(), Self::Error> { - let req = PublishRequest::new(ctx.propose_id(), node_id, node_name, node_client_urls); - let timeout = self.config.wait_synced_timeout; - let _ig = self - .map_leader(|conn| async move { conn.publish(req, timeout).await }) - .await?; - Ok(()) - } -} From 5dfbc2e9d9c6119f6c5c0d1ff2863e17c6cec3d1 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 09:22:30 +0800 Subject: [PATCH 096/322] refactor: use cluster state in context for RepeatableClientApi implementation Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/unary/mod.rs | 64 ++++++++++------------------- 1 file changed, 21 insertions(+), 43 deletions(-) diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 7bb71f647..d2c9ae7da 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -5,18 +5,18 @@ use std::{cmp::Ordering, marker::PhantomData, sync::Arc, time::Duration}; use async_trait::async_trait; use curp_external_api::cmd::Command; -use futures::{Future, StreamExt}; +use futures::StreamExt; use tonic::Response; use tracing::{debug, warn}; -use super::{retry::Context, state::State, ClientApi, ProposeResponse, RepeatableClientApi}; +use super::{retry::Context, state::State, ProposeResponse, RepeatableClientApi}; use crate::{ members::ServerId, quorum, rpc::{ - connect::ConnectApi, ConfChange, CurpError, FetchClusterRequest, FetchClusterResponse, - FetchReadStateRequest, Member, MoveLeaderRequest, ProposeConfChangeRequest, PublishRequest, - ReadState, ShutdownRequest, + ConfChange, CurpError, FetchClusterRequest, FetchClusterResponse, FetchReadStateRequest, + Member, MoveLeaderRequest, ProposeConfChangeRequest, PublishRequest, ReadState, + ShutdownRequest, }, }; @@ -61,38 +61,6 @@ impl Unary { phantom: PhantomData, } } - - /// Get a handle `f` and apply to the leader - /// - /// NOTICE: - /// - /// The leader might be outdate if the local state is stale. - /// - /// `map_leader` should never be invoked in [`ClientApi::fetch_cluster`] - /// - /// `map_leader` might call `fetch_leader_id`, `fetch_cluster`, finally - /// result in stack overflow. - async fn map_leader>>( - &self, - f: impl FnOnce(Arc) -> F, - ) -> Result { - let cached_leader = self.state.leader_id().await; - let leader_id = match cached_leader { - Some(id) => id, - None => as ClientApi>::fetch_leader_id(self, false).await?, - }; - - self.state.map_server(leader_id, f).await - } - - /// Gets the leader id - async fn leader_id(&self) -> Result { - let cached_leader = self.state.leader_id().await; - match cached_leader { - Some(id) => Ok(id), - None => as ClientApi>::fetch_leader_id(self, false).await, - } - } } #[async_trait] @@ -135,11 +103,13 @@ impl RepeatableClientApi for Unary { self.state.cluster_version().await, ); let timeout = self.config.wait_synced_timeout; - let members = self + let members = ctx + .cluster_state() .map_leader(|conn| async move { conn.propose_conf_change(req, timeout).await }) .await? .into_inner() .members; + Ok(members) } @@ -147,9 +117,11 @@ impl RepeatableClientApi for Unary { async fn propose_shutdown(&self, ctx: Context) -> Result<(), Self::Error> { let req = ShutdownRequest::new(ctx.propose_id(), self.state.cluster_version().await); let timeout = self.config.wait_synced_timeout; - let _ig = self + let _resp = ctx + .cluster_state() .map_leader(|conn| async move { conn.shutdown(req, timeout).await }) .await?; + Ok(()) } @@ -163,9 +135,11 @@ impl RepeatableClientApi for Unary { ) -> Result<(), Self::Error> { let req = PublishRequest::new(ctx.propose_id(), node_id, node_name, node_client_urls); let timeout = self.config.wait_synced_timeout; - let _ig = self + let _resp = ctx + .cluster_state() .map_leader(|conn| async move { conn.publish(req, timeout).await }) .await?; + Ok(()) } @@ -173,9 +147,11 @@ impl RepeatableClientApi for Unary { async fn move_leader(&self, node_id: u64, ctx: Context) -> Result<(), Self::Error> { let req = MoveLeaderRequest::new(node_id, self.state.cluster_version().await); let timeout = self.config.wait_synced_timeout; - let _ig = self + let _resp = ctx + .cluster_state() .map_leader(|conn| async move { conn.move_leader(req, timeout).await }) .await?; + Ok(()) } @@ -194,12 +170,14 @@ impl RepeatableClientApi for Unary { }, )?; let timeout = self.config.wait_synced_timeout; - let state = self + let state = ctx + .cluster_state() .map_leader(|conn| async move { conn.fetch_read_state(req, timeout).await }) .await? .into_inner() .read_state .unwrap_or_else(|| unreachable!("read_state must be set in fetch read state response")); + Ok(state) } @@ -210,7 +188,7 @@ impl RepeatableClientApi for Unary { async fn fetch_cluster( &self, linearizable: bool, - ctx: Context, + _ctx: Context, ) -> Result { let timeout = self.config.wait_synced_timeout; if !linearizable { From 0c54251805b1047a912ea1b0d1a4053fa35f20c9 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 09:33:12 +0800 Subject: [PATCH 097/322] refactor: use cluster_state in Context for propose impl Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/unary/mod.rs | 7 +- crates/curp/src/client/unary/propose_impl.rs | 80 ++++++-------------- 2 files changed, 26 insertions(+), 61 deletions(-) diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index d2c9ae7da..0769cf73f 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -80,14 +80,11 @@ impl RepeatableClientApi for Unary { use_fast_path: bool, ctx: Context, ) -> Result, Self::Error> { - let propose_id = ctx.propose_id(); - let first_incomplete = ctx.first_incomplete(); if cmd.is_read_only() { - self.propose_read_only(cmd, propose_id, token, use_fast_path, first_incomplete) + self.propose_read_only(cmd, token, use_fast_path, &ctx) .await } else { - self.propose_mutative(cmd, propose_id, token, first_incomplete, use_fast_path) - .await + self.propose_mutative(cmd, token, use_fast_path, &ctx).await } } diff --git a/crates/curp/src/client/unary/propose_impl.rs b/crates/curp/src/client/unary/propose_impl.rs index ea7aeb077..02a59b468 100644 --- a/crates/curp/src/client/unary/propose_impl.rs +++ b/crates/curp/src/client/unary/propose_impl.rs @@ -4,10 +4,9 @@ use curp_external_api::cmd::Command; use futures::{future, stream, FutureExt, Stream, StreamExt}; use crate::{ - client::ProposeResponse, - members::ServerId, + client::{retry::Context, ProposeResponse}, quorum, - rpc::{CurpError, OpResponse, ProposeId, ProposeRequest, RecordRequest, ResponseOp}, + rpc::{CurpError, OpResponse, ProposeRequest, RecordRequest, ResponseOp}, super_quorum, }; @@ -42,13 +41,12 @@ impl Unary { pub(super) async fn propose_mutative( &self, cmd: &C, - propose_id: ProposeId, token: Option<&String>, - first_incomplete: u64, use_fast_path: bool, + ctx: &Context, ) -> Result, CurpError> { let stream = self - .send_propose_mutative(cmd, propose_id, use_fast_path, first_incomplete, token) + .send_propose_mutative(cmd, use_fast_path, token, ctx) .await?; let mut stream = Box::into_pin(stream); let first_two_events = ( @@ -82,24 +80,15 @@ impl Unary { pub(super) async fn propose_read_only( &self, cmd: &C, - propose_id: ProposeId, token: Option<&String>, use_fast_path: bool, - first_incomplete: u64, + ctx: &Context, ) -> Result, CurpError> { - let leader_id = self.leader_id().await?; let stream = self - .send_leader_propose( - cmd, - leader_id, - propose_id, - use_fast_path, - first_incomplete, - token, - ) + .send_leader_propose(cmd, use_fast_path, token, ctx) .await?; let mut stream_pinned = Box::into_pin(stream); - if !self.send_read_index(leader_id).await { + if !self.send_read_index(ctx).await { return Err(CurpError::WrongClusterVersion(())); } if use_fast_path { @@ -132,23 +121,14 @@ impl Unary { async fn send_propose_mutative( &self, cmd: &C, - propose_id: ProposeId, use_fast_path: bool, - first_incomplete: u64, token: Option<&String>, + ctx: &Context, ) -> Result, CurpError> { - let leader_id = self.leader_id().await?; let leader_stream = self - .send_leader_propose( - cmd, - leader_id, - propose_id, - use_fast_path, - first_incomplete, - token, - ) + .send_leader_propose(cmd, use_fast_path, token, ctx) .await?; - let follower_stream = self.send_record(cmd, leader_id, propose_id).await; + let follower_stream = self.send_record(cmd, ctx).await; let select = stream::select(Box::into_pin(leader_stream), Box::into_pin(follower_stream)); Ok(Box::new(select)) @@ -158,26 +138,24 @@ impl Unary { async fn send_leader_propose( &self, cmd: &C, - leader_id: ServerId, - propose_id: ProposeId, use_fast_path: bool, - first_incomplete: u64, token: Option<&String>, + ctx: &Context, ) -> Result, CurpError> { let term = self.state.term().await; let propose_req = ProposeRequest::new::( - propose_id, + ctx.propose_id(), cmd, self.state.cluster_version().await, term, !use_fast_path, - first_incomplete, + ctx.first_incomplete(), ); let timeout = self.config.propose_timeout; let token = token.cloned(); - let stream = self - .state - .map_server(leader_id, move |conn| async move { + let stream = ctx + .cluster_state() + .map_leader(move |conn| async move { conn.propose_stream(propose_req, token, timeout).await }) .map(Self::flatten_propose_stream_result) @@ -190,19 +168,15 @@ impl Unary { /// Send read index requests to the cluster /// /// Returns `true` if the read index is successful - async fn send_read_index(&self, leader_id: ServerId) -> bool { + async fn send_read_index(&self, ctx: &Context) -> bool { let term = self.state.term().await; let connects_len = self.state.connects_len().await; let quorum = quorum(connects_len); let expect = quorum.wrapping_sub(1); let timeout = self.config.propose_timeout; - self.state - .for_each_follower( - leader_id, - |conn| async move { conn.read_index(timeout).await }, - ) - .await + ctx.cluster_state() + .for_each_follower(|conn| async move { conn.read_index(timeout).await }) .filter_map(|res| future::ready(res.ok())) .filter(|resp| future::ready(resp.get_ref().term == term)) .take(expect) @@ -214,24 +188,18 @@ impl Unary { /// Send record requests to the cluster /// /// Returns a stream that yield a single event - async fn send_record( - &self, - cmd: &C, - leader_id: ServerId, - propose_id: ProposeId, - ) -> EventStream<'_, C> { + async fn send_record(&self, cmd: &C, ctx: &Context) -> EventStream<'_, C> { let connects_len = self.state.connects_len().await; let superquorum = super_quorum(connects_len); let timeout = self.config.propose_timeout; - let record_req = RecordRequest::new::(propose_id, cmd); + let record_req = RecordRequest::new::(ctx.propose_id(), cmd); let expect = superquorum.wrapping_sub(1); - let stream = self - .state - .for_each_follower(leader_id, |conn| { + let stream = ctx + .cluster_state() + .for_each_follower(|conn| { let record_req_c = record_req.clone(); async move { conn.record(record_req_c, timeout).await } }) - .await .filter_map(|res| future::ready(res.ok())) .filter(|resp| future::ready(!resp.get_ref().conflict)) .take(expect) From 852186227165c588bab09a74ca9fdb94da365c39 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 10:09:16 +0800 Subject: [PATCH 098/322] refactor: remove `State` from `Unary`, use `Context` instead Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/cluster_state.rs | 5 +++ crates/curp/src/client/mod.rs | 22 ++++++-------- crates/curp/src/client/unary/mod.rs | 32 +++++++++----------- crates/curp/src/client/unary/propose_impl.rs | 18 +++++------ 4 files changed, 37 insertions(+), 40 deletions(-) diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index cdfe72b47..b3150c12e 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -110,4 +110,9 @@ impl ClusterState { pub(crate) fn term(&self) -> u64 { self.term } + + /// Returns the cluster version + pub(crate) fn cluster_version(&self) -> u64 { + self.cluster_version + } } diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 7fa44b715..e728a0013 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -43,7 +43,7 @@ mod tests; #[cfg(madsim)] use std::sync::atomic::AtomicU64; -use std::{collections::HashMap, fmt::Debug, ops::Deref, sync::Arc, time::Duration}; +use std::{collections::HashMap, fmt::Debug, ops::Deref, time::Duration}; use async_trait::async_trait; use curp_external_api::cmd::Command; @@ -57,11 +57,12 @@ use utils::ClientTlsConfig; use utils::{build_endpoint, config::ClientConfig}; use self::{ + config::Config, fetch::Fetch, keep_alive::KeepAlive, retry::{Context, Retry, RetryConfig}, state::StateBuilder, - unary::{Unary, UnaryConfig}, + unary::Unary, }; use crate::{ members::ServerId, @@ -442,10 +443,13 @@ impl ClientBuilder { } /// Init unary config - fn init_unary_config(&self) -> UnaryConfig { - UnaryConfig::new( + fn init_unary_config(&self) -> Config { + Config::new( + None, + self.tls_config.clone(), *self.config.propose_timeout(), *self.config.wait_synced_timeout(), + self.is_raw_curp, ) } @@ -459,12 +463,11 @@ impl ClientBuilder { &self, ) -> Result + Send + Sync + 'static, tonic::Status> { - let state = Arc::new(self.init_state_builder().build()); let keep_alive = KeepAlive::new(*self.config.keep_alive_interval()); // TODO: build the fetch object let fetch = Fetch::default(); let client = Retry::new( - Unary::new(Arc::clone(&state), self.init_unary_config()), + Unary::new(self.init_unary_config()), self.init_retry_config(), keep_alive, fetch, @@ -510,16 +513,11 @@ impl ClientBuilderWithBypass

{ pub fn build( self, ) -> Result, tonic::Status> { - let state = self - .inner - .init_state_builder() - .build_bypassed::

(self.local_server_id, self.local_server); - let state = Arc::new(state); let keep_alive = KeepAlive::new(*self.inner.config.keep_alive_interval()); // TODO: build the fetch object let fetch = Fetch::default(); let client = Retry::new( - Unary::new(Arc::clone(&state), self.inner.init_unary_config()), + Unary::new(self.inner.init_unary_config()), self.inner.init_retry_config(), keep_alive, fetch, diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 0769cf73f..2737b6f01 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -1,15 +1,14 @@ /// Client propose implementation mod propose_impl; -use std::{cmp::Ordering, marker::PhantomData, sync::Arc, time::Duration}; +use std::{cmp::Ordering, marker::PhantomData, time::Duration}; use async_trait::async_trait; use curp_external_api::cmd::Command; -use futures::StreamExt; use tonic::Response; use tracing::{debug, warn}; -use super::{retry::Context, state::State, ProposeResponse, RepeatableClientApi}; +use super::{config::Config, retry::Context, ProposeResponse, RepeatableClientApi}; use crate::{ members::ServerId, quorum, @@ -44,19 +43,16 @@ impl UnaryConfig { /// The unary client #[derive(Debug)] pub(super) struct Unary { - /// Client state - state: Arc, /// Unary config - config: UnaryConfig, + config: Config, /// marker phantom: PhantomData, } impl Unary { /// Create an unary client - pub(super) fn new(state: Arc, config: UnaryConfig) -> Self { + pub(super) fn new(config: Config) -> Self { Self { - state, config, phantom: PhantomData, } @@ -97,9 +93,9 @@ impl RepeatableClientApi for Unary { let req = ProposeConfChangeRequest::new( ctx.propose_id(), changes, - self.state.cluster_version().await, + ctx.cluster_state().cluster_version(), ); - let timeout = self.config.wait_synced_timeout; + let timeout = self.config.wait_synced_timeout(); let members = ctx .cluster_state() .map_leader(|conn| async move { conn.propose_conf_change(req, timeout).await }) @@ -112,8 +108,8 @@ impl RepeatableClientApi for Unary { /// Send propose to shutdown cluster async fn propose_shutdown(&self, ctx: Context) -> Result<(), Self::Error> { - let req = ShutdownRequest::new(ctx.propose_id(), self.state.cluster_version().await); - let timeout = self.config.wait_synced_timeout; + let req = ShutdownRequest::new(ctx.propose_id(), ctx.cluster_state().cluster_version()); + let timeout = self.config.wait_synced_timeout(); let _resp = ctx .cluster_state() .map_leader(|conn| async move { conn.shutdown(req, timeout).await }) @@ -131,7 +127,7 @@ impl RepeatableClientApi for Unary { ctx: Context, ) -> Result<(), Self::Error> { let req = PublishRequest::new(ctx.propose_id(), node_id, node_name, node_client_urls); - let timeout = self.config.wait_synced_timeout; + let timeout = self.config.wait_synced_timeout(); let _resp = ctx .cluster_state() .map_leader(|conn| async move { conn.publish(req, timeout).await }) @@ -142,8 +138,8 @@ impl RepeatableClientApi for Unary { /// Send move leader request async fn move_leader(&self, node_id: u64, ctx: Context) -> Result<(), Self::Error> { - let req = MoveLeaderRequest::new(node_id, self.state.cluster_version().await); - let timeout = self.config.wait_synced_timeout; + let req = MoveLeaderRequest::new(node_id, ctx.cluster_state().cluster_version()); + let timeout = self.config.wait_synced_timeout(); let _resp = ctx .cluster_state() .map_leader(|conn| async move { conn.move_leader(req, timeout).await }) @@ -160,13 +156,13 @@ impl RepeatableClientApi for Unary { ) -> Result { // Same as fast_round, we blame the serializing error to the server even // thought it is the local error - let req = FetchReadStateRequest::new(cmd, self.state.cluster_version().await).map_err( + let req = FetchReadStateRequest::new(cmd, ctx.cluster_state().cluster_version()).map_err( |ser_err| { warn!("serializing error: {ser_err}"); CurpError::from(ser_err) }, )?; - let timeout = self.config.wait_synced_timeout; + let timeout = self.config.wait_synced_timeout(); let state = ctx .cluster_state() .map_leader(|conn| async move { conn.fetch_read_state(req, timeout).await }) @@ -187,7 +183,7 @@ impl RepeatableClientApi for Unary { linearizable: bool, _ctx: Context, ) -> Result { - let timeout = self.config.wait_synced_timeout; + let timeout = self.config.wait_synced_timeout(); if !linearizable { // firstly, try to fetch the local server if let Some(connect) = self.state.local_connect().await { diff --git a/crates/curp/src/client/unary/propose_impl.rs b/crates/curp/src/client/unary/propose_impl.rs index 02a59b468..f26c0a69c 100644 --- a/crates/curp/src/client/unary/propose_impl.rs +++ b/crates/curp/src/client/unary/propose_impl.rs @@ -142,16 +142,16 @@ impl Unary { token: Option<&String>, ctx: &Context, ) -> Result, CurpError> { - let term = self.state.term().await; + let term = ctx.cluster_state().term(); let propose_req = ProposeRequest::new::( ctx.propose_id(), cmd, - self.state.cluster_version().await, + ctx.cluster_state().cluster_version(), term, !use_fast_path, ctx.first_incomplete(), ); - let timeout = self.config.propose_timeout; + let timeout = self.config.propose_timeout(); let token = token.cloned(); let stream = ctx .cluster_state() @@ -169,11 +169,10 @@ impl Unary { /// /// Returns `true` if the read index is successful async fn send_read_index(&self, ctx: &Context) -> bool { - let term = self.state.term().await; - let connects_len = self.state.connects_len().await; - let quorum = quorum(connects_len); + let term = ctx.cluster_state().term(); + let quorum = ctx.cluster_state().get_quorum(quorum); let expect = quorum.wrapping_sub(1); - let timeout = self.config.propose_timeout; + let timeout = self.config.propose_timeout(); ctx.cluster_state() .for_each_follower(|conn| async move { conn.read_index(timeout).await }) @@ -189,9 +188,8 @@ impl Unary { /// /// Returns a stream that yield a single event async fn send_record(&self, cmd: &C, ctx: &Context) -> EventStream<'_, C> { - let connects_len = self.state.connects_len().await; - let superquorum = super_quorum(connects_len); - let timeout = self.config.propose_timeout; + let superquorum = ctx.cluster_state().get_quorum(super_quorum); + let timeout = self.config.propose_timeout(); let record_req = RecordRequest::new::(ctx.propose_id(), cmd); let expect = superquorum.wrapping_sub(1); let stream = ctx From 8951af978acde9b350403105ccbefe9f8cbc1083 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 10:21:44 +0800 Subject: [PATCH 099/322] refactor: use `Fetch` to implement `fetch_cluster` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/cluster_state.rs | 5 + crates/curp/src/client/fetch.rs | 4 +- crates/curp/src/client/mod.rs | 10 -- crates/curp/src/client/retry.rs | 9 +- crates/curp/src/client/unary/mod.rs | 118 +----------------------- 5 files changed, 17 insertions(+), 129 deletions(-) diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index b3150c12e..5af95176a 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -115,4 +115,9 @@ impl ClusterState { pub(crate) fn cluster_version(&self) -> u64 { self.cluster_version } + + /// Returns the leader id + pub(crate) fn leader_id(&self) -> u64 { + self.leader + } } diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index e9af6d884..2630a6f31 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -32,7 +32,7 @@ impl Fetch { pub(crate) async fn fetch_cluster( &self, state: ClusterState, - ) -> Result { + ) -> Result<(ClusterState, FetchClusterResponse), CurpError> { /// Retry interval const FETCH_RETRY_INTERVAL: Duration = Duration::from_secs(1); loop { @@ -51,7 +51,7 @@ impl Fetch { new_connects, ); if self.fetch_term(&new_state).await { - return Ok(new_state); + return Ok((new_state, resp)); } warn!("Fetch cluster failed, sleep for {FETCH_RETRY_INTERVAL:?}"); tokio::time::sleep(FETCH_RETRY_INTERVAL).await; diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index e728a0013..cbbee265c 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -228,16 +228,6 @@ trait RepeatableClientApi { cmd: &Self::Cmd, ctx: Context, ) -> Result; - - /// Send fetch cluster requests to all servers (That's because initially, we didn't - /// know who the leader is.) - /// - /// Note: The fetched cluster may still be outdated if `linearizable` is false - async fn fetch_cluster( - &self, - linearizable: bool, - ctx: Context, - ) -> Result; } /// Client builder to build a client diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 56d991315..e329ecb60 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -289,7 +289,7 @@ where | CurpError::WrongClusterVersion(()) | CurpError::Redirect(_) // FIXME: The redirect error needs to include full cluster state | CurpError::Zombie(()) => { - let new_cluster_state = self.fetch.fetch_cluster(cluster_state).await?; + let (new_cluster_state, _) = self.fetch.fetch_cluster(cluster_state).await?; // TODO: Prevent concurrent updating cluster state *self.cluster_state.write() = new_cluster_state; } @@ -388,8 +388,11 @@ where &self, linearizable: bool, ) -> Result { - self.retry::<_, _>(|client, ctx| client.fetch_cluster(linearizable, ctx)) - .await + self.retry::<_, _>(|client, ctx| async move { + let (_, resp) = self.fetch.fetch_cluster(ctx.cluster_state()).await?; + Ok(resp) + }) + .await } } diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 2737b6f01..a99bdbbdf 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -1,21 +1,18 @@ /// Client propose implementation mod propose_impl; -use std::{cmp::Ordering, marker::PhantomData, time::Duration}; +use std::{marker::PhantomData, time::Duration}; use async_trait::async_trait; use curp_external_api::cmd::Command; -use tonic::Response; -use tracing::{debug, warn}; +use tracing::warn; use super::{config::Config, retry::Context, ProposeResponse, RepeatableClientApi}; use crate::{ members::ServerId, - quorum, rpc::{ - ConfChange, CurpError, FetchClusterRequest, FetchClusterResponse, FetchReadStateRequest, - Member, MoveLeaderRequest, ProposeConfChangeRequest, PublishRequest, ReadState, - ShutdownRequest, + ConfChange, CurpError, FetchReadStateRequest, Member, MoveLeaderRequest, + ProposeConfChangeRequest, PublishRequest, ReadState, ShutdownRequest, }, }; @@ -173,111 +170,4 @@ impl RepeatableClientApi for Unary { Ok(state) } - - /// Send fetch cluster requests to all servers (That's because initially, we didn't - /// know who the leader is.) - /// - /// Note: The fetched cluster may still be outdated if `linearizable` is false - async fn fetch_cluster( - &self, - linearizable: bool, - _ctx: Context, - ) -> Result { - let timeout = self.config.wait_synced_timeout(); - if !linearizable { - // firstly, try to fetch the local server - if let Some(connect) = self.state.local_connect().await { - /// local timeout, in fact, local connect should only be bypassed, so the timeout maybe unused. - const FETCH_LOCAL_TIMEOUT: Duration = Duration::from_secs(1); - - let resp = connect - .fetch_cluster(FetchClusterRequest::default(), FETCH_LOCAL_TIMEOUT) - .await? - .into_inner(); - debug!("fetch local cluster {resp:?}"); - - return Ok(resp); - } - } - // then fetch the whole cluster - let mut responses = self - .state - .for_each_server(|conn| async move { - ( - conn.id(), - conn.fetch_cluster(FetchClusterRequest { linearizable }, timeout) - .await - .map(Response::into_inner), - ) - }) - .await; - let quorum = quorum(responses.len()); - - let mut max_term = 0; - let mut res = None; - let mut ok_cnt = 0; - let mut err: Option = None; - - while let Some((id, resp)) = responses.next().await { - let inner = match resp { - Ok(r) => r, - Err(e) => { - warn!("fetch cluster from {} failed, {:?}", id, e); - // similar to fast round - if e.should_abort_fast_round() { - return Err(e); - } - if let Some(old_err) = err.as_ref() { - if old_err.priority() <= e.priority() { - err = Some(e); - } - } else { - err = Some(e); - } - continue; - } - }; - // Ignore the response of a node that doesn't know who the leader is. - if inner.leader_id.is_some() { - #[allow(clippy::arithmetic_side_effects)] - match max_term.cmp(&inner.term) { - Ordering::Less => { - max_term = inner.term; - if !inner.members.is_empty() { - res = Some(inner); - } - // reset ok count to 1 - ok_cnt = 1; - } - Ordering::Equal => { - if !inner.members.is_empty() { - res = Some(inner); - } - ok_cnt += 1; - } - Ordering::Greater => {} - } - } - // first check quorum - if ok_cnt >= quorum { - // then check if we got the response - if let Some(res) = res { - debug!("fetch cluster succeeded, result: {res:?}"); - if let Err(e) = self.state.check_and_update(&res).await { - warn!("update to a new cluster state failed, error {e}"); - } - return Ok(res); - } - debug!("fetch cluster quorum ok, but members are empty"); - } - debug!("fetch cluster from {id} success"); - } - - if let Some(err) = err { - return Err(err); - } - - // It seems that the max term has not reached the majority here. Mock a transport error and return it to the external to retry. - return Err(CurpError::RpcTransport(())); - } } From 1acba907532af31a1cefb125cfe054c0b3b1e4cf Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 10:25:40 +0800 Subject: [PATCH 100/322] chore: allow unused error handling methods Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/rpc/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index 10c56fa99..d561e27bf 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -604,6 +604,7 @@ impl PublishRequest { } } +#[allow(unused)] // TODO: Use the error handling methods /// NOTICE: /// /// Please check test case `test_unary_fast_round_return_early_err` @@ -682,7 +683,6 @@ impl CurpError { } /// Whether to abort slow round early - #[allow(unused)] pub(crate) fn should_abort_slow_round(&self) -> bool { matches!( *self, From 72859ca9c4d5535f4525a4f7ec48dab3418890e6 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 10:26:57 +0800 Subject: [PATCH 101/322] chore: remove unused `UnaryConfig` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/unary/mod.rs | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index a99bdbbdf..5c676672c 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -1,7 +1,7 @@ /// Client propose implementation mod propose_impl; -use std::{marker::PhantomData, time::Duration}; +use std::marker::PhantomData; use async_trait::async_trait; use curp_external_api::cmd::Command; @@ -16,27 +16,6 @@ use crate::{ }, }; -/// The unary client config -#[derive(Debug)] -pub(super) struct UnaryConfig { - /// The rpc timeout of a propose request - propose_timeout: Duration, - /// The rpc timeout of a 2-RTT request, usually takes longer than propose timeout - /// - /// The recommended the values is within (propose_timeout, 2 * propose_timeout]. - wait_synced_timeout: Duration, -} - -impl UnaryConfig { - /// Create a unary config - pub(super) fn new(propose_timeout: Duration, wait_synced_timeout: Duration) -> Self { - Self { - propose_timeout, - wait_synced_timeout, - } - } -} - /// The unary client #[derive(Debug)] pub(super) struct Unary { From f8bc066f4c67cf8c0317c8fcb6bae807e0c8bb01 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 10:29:12 +0800 Subject: [PATCH 102/322] chore: remove unused async and result in `propose_impl` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/unary/propose_impl.rs | 28 ++++++++------------ 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/crates/curp/src/client/unary/propose_impl.rs b/crates/curp/src/client/unary/propose_impl.rs index f26c0a69c..4a94255b8 100644 --- a/crates/curp/src/client/unary/propose_impl.rs +++ b/crates/curp/src/client/unary/propose_impl.rs @@ -45,9 +45,7 @@ impl Unary { use_fast_path: bool, ctx: &Context, ) -> Result, CurpError> { - let stream = self - .send_propose_mutative(cmd, use_fast_path, token, ctx) - .await?; + let stream = self.send_propose_mutative(cmd, use_fast_path, token, ctx); let mut stream = Box::into_pin(stream); let first_two_events = ( Self::next_event(&mut stream).await?, @@ -84,9 +82,7 @@ impl Unary { use_fast_path: bool, ctx: &Context, ) -> Result, CurpError> { - let stream = self - .send_leader_propose(cmd, use_fast_path, token, ctx) - .await?; + let stream = self.send_leader_propose(cmd, use_fast_path, token, ctx); let mut stream_pinned = Box::into_pin(stream); if !self.send_read_index(ctx).await { return Err(CurpError::WrongClusterVersion(())); @@ -118,30 +114,28 @@ impl Unary { /// Send propose to the cluster /// /// Returns a stream that combines the propose stream and record request - async fn send_propose_mutative( + fn send_propose_mutative( &self, cmd: &C, use_fast_path: bool, token: Option<&String>, ctx: &Context, - ) -> Result, CurpError> { - let leader_stream = self - .send_leader_propose(cmd, use_fast_path, token, ctx) - .await?; - let follower_stream = self.send_record(cmd, ctx).await; + ) -> EventStream<'_, C> { + let leader_stream = self.send_leader_propose(cmd, use_fast_path, token, ctx); + let follower_stream = self.send_record(cmd, ctx); let select = stream::select(Box::into_pin(leader_stream), Box::into_pin(follower_stream)); - Ok(Box::new(select)) + Box::new(select) } /// Send propose request to the leader - async fn send_leader_propose( + fn send_leader_propose( &self, cmd: &C, use_fast_path: bool, token: Option<&String>, ctx: &Context, - ) -> Result, CurpError> { + ) -> EventStream<'_, C> { let term = ctx.cluster_state().term(); let propose_req = ProposeRequest::new::( ctx.propose_id(), @@ -162,7 +156,7 @@ impl Unary { .map(Box::into_pin) .flatten_stream(); - Ok(Box::new(stream)) + Box::new(stream) } /// Send read index requests to the cluster @@ -187,7 +181,7 @@ impl Unary { /// Send record requests to the cluster /// /// Returns a stream that yield a single event - async fn send_record(&self, cmd: &C, ctx: &Context) -> EventStream<'_, C> { + fn send_record(&self, cmd: &C, ctx: &Context) -> EventStream<'_, C> { let superquorum = ctx.cluster_state().get_quorum(super_quorum); let timeout = self.config.propose_timeout(); let record_req = RecordRequest::new::(ctx.propose_id(), cmd); From ae366fe9b1cfa7a52e281b46d80591bd34342a73 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 11:39:13 +0800 Subject: [PATCH 103/322] refactor: add override connect to `Fetch` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/fetch.rs | 49 +++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index 2630a6f31..b935ec231 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -15,17 +15,31 @@ use crate::{ use super::cluster_state::ClusterState; use super::config::Config; +/// An override connect +type OverrideConnect = (u64, Arc); + /// Fetch cluster implementation -#[derive(Debug, Default, Clone)] +#[derive(Default, Clone)] pub(crate) struct Fetch { /// The fetch config config: Config, + /// Override connect + override_connects: Vec, } impl Fetch { /// Creates a new `Fetch` pub(crate) fn new(config: Config) -> Self { - Self { config } + Self { + config, + override_connects: Vec::new(), + } + } + + /// Add an override connect to fetch cluster response + pub(crate) fn with_override(mut self, connect: OverrideConnect) -> Self { + self.override_connects.push(connect); + self } /// Fetch cluster and updates the current state @@ -42,6 +56,7 @@ impl Fetch { .ok_or(CurpError::internal("cluster not available"))?; let new_members = self.member_addrs(&resp); let new_connects = self.connect_to(new_members); + let new_connects = self.override_connects(new_connects); let new_state = ClusterState::new( resp.leader_id .unwrap_or_else(|| unreachable!("leader id should be Some")) @@ -119,4 +134,34 @@ impl Fetch { }) .collect() } + + /// Overrides the connects + fn override_connects( + &self, + mut connects: HashMap>, + ) -> HashMap> { + for &(id, ref c) in &self.override_connects { + if connects.insert(id, Arc::clone(c)).is_none() { + warn!("override an non-existing connect with id: {id}"); + } + } + + connects + } +} + +impl std::fmt::Debug for Fetch { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Fetch") + .field("config", &self.config) + .field( + "override_connects", + &self + .override_connects + .iter() + .map(|&(id, _)| id) + .collect::>(), + ) + .finish() + } } From fbe7275c18ccfe865265671ce6a41d29509999f5 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 11:51:43 +0800 Subject: [PATCH 104/322] refactor: client build config Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/keep_alive.rs | 2 +- crates/curp/src/client/mod.rs | 54 +++++++++++++--------------- 2 files changed, 25 insertions(+), 31 deletions(-) diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index 3bcc77a04..798ca3381 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -12,7 +12,7 @@ use parking_lot::RwLock; use tokio::{sync::broadcast, task::JoinHandle}; use tracing::{debug, info, warn}; -use super::{cluster_state::ClusterState, state::State}; +use super::cluster_state::ClusterState; use crate::rpc::{connect::ConnectApi, CurpError, Redirect}; /// Keep alive diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index cbbee265c..324a4c25f 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -43,7 +43,7 @@ mod tests; #[cfg(madsim)] use std::sync::atomic::AtomicU64; -use std::{collections::HashMap, fmt::Debug, ops::Deref, time::Duration}; +use std::{collections::HashMap, fmt::Debug, ops::Deref, sync::Arc, time::Duration}; use async_trait::async_trait; use curp_external_api::cmd::Command; @@ -61,14 +61,15 @@ use self::{ fetch::Fetch, keep_alive::KeepAlive, retry::{Context, Retry, RetryConfig}, - state::StateBuilder, unary::Unary, }; use crate::{ members::ServerId, rpc::{ - protocol_client::ProtocolClient, ConfChange, FetchClusterRequest, FetchClusterResponse, - Member, ProposeId, Protocol, ReadState, + connect::{BypassedConnect, ConnectApi}, + protocol_client::ProtocolClient, + ConfChange, FetchClusterRequest, FetchClusterResponse, Member, ProposeId, Protocol, + ReadState, }, tracker::Tracker, }; @@ -398,24 +399,6 @@ impl ClientBuilder { .ok_or(tonic::Status::unavailable("cluster not published")) } - /// Init state builder - fn init_state_builder(&self) -> StateBuilder { - let mut builder = StateBuilder::new( - self.all_members.clone().unwrap_or_else(|| { - unreachable!("must set the initial members or discover from some endpoints") - }), - self.tls_config.clone(), - ); - if let Some(version) = self.cluster_version { - builder.set_cluster_version(version); - } - if let Some((id, term)) = self.leader_state { - builder.set_leader_state(id, term); - } - builder.set_is_raw_curp(self.is_raw_curp); - builder - } - /// Init retry config fn init_retry_config(&self) -> RetryConfig { if *self.config.fixed_backoff() { @@ -433,9 +416,9 @@ impl ClientBuilder { } /// Init unary config - fn init_unary_config(&self) -> Config { + fn init_config(&self, local_server_id: Option) -> Config { Config::new( - None, + local_server_id, self.tls_config.clone(), *self.config.propose_timeout(), *self.config.wait_synced_timeout(), @@ -453,11 +436,11 @@ impl ClientBuilder { &self, ) -> Result + Send + Sync + 'static, tonic::Status> { + let config = self.init_config(None); let keep_alive = KeepAlive::new(*self.config.keep_alive_interval()); - // TODO: build the fetch object - let fetch = Fetch::default(); + let fetch = Fetch::new(config.clone()); let client = Retry::new( - Unary::new(self.init_unary_config()), + Unary::new(config), self.init_retry_config(), keep_alive, fetch, @@ -494,6 +477,16 @@ impl ClientBuilder { } impl ClientBuilderWithBypass

{ + /// Build the state with local server + pub(super) fn bypassed_connect( + local_server_id: ServerId, + local_server: P, + ) -> (u64, Arc) { + debug!("client bypassed server({local_server_id})"); + let connect = BypassedConnect::new(local_server_id, local_server); + (local_server_id, Arc::new(connect)) + } + /// Build the client with local server /// /// # Errors @@ -503,11 +496,12 @@ impl ClientBuilderWithBypass

{ pub fn build( self, ) -> Result, tonic::Status> { + let bypassed = Self::bypassed_connect(self.local_server_id, self.local_server); + let config = self.inner.init_config(Some(self.local_server_id)); let keep_alive = KeepAlive::new(*self.inner.config.keep_alive_interval()); - // TODO: build the fetch object - let fetch = Fetch::default(); + let fetch = Fetch::new(config.clone()).with_override(bypassed); let client = Retry::new( - Unary::new(self.inner.init_unary_config()), + Unary::new(config), self.inner.init_retry_config(), keep_alive, fetch, From 926ba69b6c710286d5a02b7ff4710bfe3f8908bd Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 15:46:28 +0800 Subject: [PATCH 105/322] refactor: use an enum for cluster state Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/cluster_state.rs | 84 +++++++++++++++++++++---- 1 file changed, 73 insertions(+), 11 deletions(-) diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index 5af95176a..594022309 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -4,13 +4,74 @@ use futures::{stream::FuturesUnordered, Future}; use crate::{ members::ServerId, - rpc::{connect::ConnectApi, CurpError}, + rpc::{connect::ConnectApi, connects, CurpError}, }; +/// Take an async function and map to all server, returning `FuturesUnordered` +pub(crate) trait ForEachServer { + /// Take an async function and map to all server, returning `FuturesUnordered` + fn for_each_server>( + &self, + f: impl FnMut(Arc) -> F, + ) -> FuturesUnordered; +} + +/// Cluster State +#[derive(Debug, Clone)] +pub(crate) enum ClusterStateSuper { + /// Initial cluster state + Init(ClusterStateInit), + /// Ready cluster state + Ready(ClusterState), +} + +impl ForEachServer for ClusterStateSuper { + fn for_each_server>( + &self, + f: impl FnMut(Arc) -> F, + ) -> FuturesUnordered { + match *self { + ClusterStateSuper::Init(ref init) => init.for_each_server(f), + ClusterStateSuper::Ready(ref ready) => ready.for_each_server(f), + } + } +} + +/// Initial cluster state +#[derive(Clone)] +pub(crate) struct ClusterStateInit { + /// Member connects + connects: Vec>, +} + +impl ClusterStateInit { + /// Creates a new `ClusterStateInit` + pub(crate) fn new(connects: Vec>) -> Self { + Self { connects } + } +} + +impl ForEachServer for ClusterStateInit { + fn for_each_server>( + &self, + f: impl FnMut(Arc) -> F, + ) -> FuturesUnordered { + self.connects.clone().into_iter().map(f).collect() + } +} + +impl std::fmt::Debug for ClusterStateInit { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ClusterStateInit") + .field("connects_len", &self.connects.len()) + .finish() + } +} + /// The cluster state /// /// The client must discover the cluster info before sending any propose -#[derive(Default, Clone)] +#[derive(Clone, Default)] pub(crate) struct ClusterState { /// Leader id. leader: ServerId, @@ -33,6 +94,15 @@ impl std::fmt::Debug for ClusterState { } } +impl ForEachServer for ClusterState { + fn for_each_server>( + &self, + f: impl FnMut(Arc) -> F, + ) -> FuturesUnordered { + self.connects.values().map(Arc::clone).map(f).collect() + } +} + impl ClusterState { /// Creates a new `ClusterState` pub(crate) fn new( @@ -72,18 +142,10 @@ impl ClusterState { // an inconsistency between the client's local leader state and the cluster // state, then mock a `WrongClusterVersion` return to the outside. f(Arc::clone(self.connects.get(&self.leader).unwrap_or_else( - || unreachable!("leader connect should always exists"), + || unreachable!("leader should always exist"), ))) } - /// Take an async function and map to all server, returning `FuturesUnordered` - pub(crate) fn for_each_server>( - &self, - f: impl FnMut(Arc) -> F, - ) -> FuturesUnordered { - self.connects.values().map(Arc::clone).map(f).collect() - } - /// Take an async function and map to all server, returning `FuturesUnordered` pub(crate) fn for_each_follower>( &self, From c60330e15135241851ad60e984ff94e1b2494981 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 18:14:18 +0800 Subject: [PATCH 106/322] refactor: implement initial cluster state fetch mechanism Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/fetch.rs | 6 +-- crates/curp/src/client/keep_alive.rs | 27 +++++++++--- crates/curp/src/client/mod.rs | 19 +++++++++ crates/curp/src/client/retry.rs | 61 ++++++++++++++++++++++++---- 4 files changed, 96 insertions(+), 17 deletions(-) diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index b935ec231..e3844ecd4 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -12,7 +12,7 @@ use crate::{ rpc::{self, connect::ConnectApi, CurpError, FetchClusterRequest, FetchClusterResponse}, }; -use super::cluster_state::ClusterState; +use super::cluster_state::{ClusterState, ClusterStateSuper, ForEachServer}; use super::config::Config; /// An override connect @@ -45,7 +45,7 @@ impl Fetch { /// Fetch cluster and updates the current state pub(crate) async fn fetch_cluster( &self, - state: ClusterState, + state: impl ForEachServer, ) -> Result<(ClusterState, FetchClusterResponse), CurpError> { /// Retry interval const FETCH_RETRY_INTERVAL: Duration = Duration::from_secs(1); @@ -94,7 +94,7 @@ impl Fetch { /// Prefetch, send fetch cluster request to the cluster and get the /// config with the greatest quorum. - async fn pre_fetch(&self, state: &ClusterState) -> Option { + async fn pre_fetch(&self, state: &impl ForEachServer) -> Option { let timeout = self.config.wait_synced_timeout(); let requests = state.for_each_server(|c| async move { c.fetch_cluster(FetchClusterRequest { linearizable: true }, timeout) diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index 798ca3381..a45169b61 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -7,12 +7,19 @@ use std::{ }; use event_listener::Event; -use futures::Future; +use futures::{ + future::{self, OptionFuture}, + Future, FutureExt, +}; use parking_lot::RwLock; use tokio::{sync::broadcast, task::JoinHandle}; use tracing::{debug, info, warn}; -use super::cluster_state::ClusterState; +use super::{ + cluster_state::{ClusterState, ClusterStateSuper}, + fetch::Fetch, + retry::ClusterStateShared, +}; use crate::rpc::{connect::ConnectApi, CurpError, Redirect}; /// Keep alive @@ -55,7 +62,7 @@ impl KeepAlive { /// Streaming keep alive pub(crate) fn spawn_keep_alive( self, - cluster_state: Arc>, + cluster_state: Arc, ) -> KeepAliveHandle { /// Sleep duration when keep alive failed const FAIL_SLEEP_DURATION: Duration = Duration::from_secs(1); @@ -65,9 +72,19 @@ impl KeepAlive { let update_event_c = Arc::clone(&update_event); let handle = tokio::spawn(async move { loop { - let current_state = cluster_state.read().clone(); + let fetch_result = cluster_state.ready_or_fetch().await; + let current_state = match fetch_result { + Ok(ready) => ready, + Err(e) => { + warn!("fetch cluster failed: {e:?}"); + // Sleep for some time, the cluster state should be updated in a while + tokio::time::sleep(FAIL_SLEEP_DURATION).await; + continue; + } + }; let current_id = client_id.load(Ordering::Relaxed); - match self.keep_alive_with(current_id, current_state).await { + let result = self.keep_alive_with(current_id, current_state).await; + match result { Ok(new_id) => { client_id.store(new_id, Ordering::Relaxed); let _ignore = update_event.notify(usize::MAX); diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 324a4c25f..903edc7cf 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -57,6 +57,7 @@ use utils::ClientTlsConfig; use utils::{build_endpoint, config::ClientConfig}; use self::{ + cluster_state::ClusterStateInit, config::Config, fetch::Fetch, keep_alive::KeepAlive, @@ -66,6 +67,7 @@ use self::{ use crate::{ members::ServerId, rpc::{ + self, connect::{BypassedConnect, ConnectApi}, protocol_client::ProtocolClient, ConfChange, FetchClusterRequest, FetchClusterResponse, Member, ProposeId, Protocol, @@ -426,6 +428,19 @@ impl ClientBuilder { ) } + /// Connect to members + fn connect_members(&self, tls_config: Option<&ClientTlsConfig>) -> ClusterStateInit { + let all_members = self + .all_members + .clone() + .unwrap_or_else(|| unreachable!("requires members")); + let connects = rpc::connects(all_members, tls_config) + .map(|(_id, conn)| conn) + .collect(); + + ClusterStateInit::new(connects) + } + /// Build the client /// /// # Errors @@ -439,11 +454,13 @@ impl ClientBuilder { let config = self.init_config(None); let keep_alive = KeepAlive::new(*self.config.keep_alive_interval()); let fetch = Fetch::new(config.clone()); + let cluster_state_init = self.connect_members(self.tls_config.as_ref()); let client = Retry::new( Unary::new(config), self.init_retry_config(), keep_alive, fetch, + cluster_state_init, ); Ok(client) @@ -500,11 +517,13 @@ impl ClientBuilderWithBypass

{ let config = self.inner.init_config(Some(self.local_server_id)); let keep_alive = KeepAlive::new(*self.inner.config.keep_alive_interval()); let fetch = Fetch::new(config.clone()).with_override(bypassed); + let cluster_state_init = self.inner.connect_members(self.inner.tls_config.as_ref()); let client = Retry::new( Unary::new(config), self.inner.init_retry_config(), keep_alive, fetch, + cluster_state_init, ); Ok(client) diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index e329ecb60..ca703ba28 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -7,17 +7,18 @@ use std::{ use async_trait::async_trait; use futures::Future; use parking_lot::RwLock; -use tracing::warn; +use tracing::{debug, warn}; use super::{ - cluster_state::ClusterState, + cluster_state::{ClusterState, ClusterStateInit, ClusterStateSuper}, + config::Config, fetch::Fetch, keep_alive::{KeepAlive, KeepAliveHandle}, ClientApi, ProposeIdGuard, ProposeResponse, RepeatableClientApi, }; use crate::{ members::ServerId, - rpc::{ConfChange, CurpError, FetchClusterResponse, Member, ProposeId, ReadState}, + rpc::{connects, ConfChange, CurpError, FetchClusterResponse, Member, ProposeId, ReadState}, tracker::Tracker, }; @@ -174,6 +175,46 @@ impl CmdTracker { } } +/// A shared cluster state +#[derive(Debug)] +pub(crate) struct ClusterStateShared { + /// Inner state + inner: RwLock, + /// Fetch cluster object + fetch: Fetch, +} + +impl ClusterStateShared { + /// Creates a new `ClusterStateShared` + fn new(inner: ClusterStateSuper, fetch: Fetch) -> Self { + Self { + inner: RwLock::new(inner), + fetch, + } + } + + /// Fetch and updates current state + /// + /// Returns the fetched cluster state + pub(crate) async fn fetch_and_update(&self) -> Result { + let current = self.inner.read().clone(); + let (new_state, _) = self.fetch.fetch_cluster(current).await?; + *self.inner.write() = ClusterStateSuper::Ready(new_state.clone()); + debug!("cluster state updates to: {new_state:?}"); + + Ok(new_state) + } + + /// Retrieves the cluster state if it's ready, or fetches and updates it if not. + pub(crate) async fn ready_or_fetch(&self) -> Result { + let current = self.inner.read().clone(); + match current { + ClusterStateSuper::Init(init) => self.fetch_and_update().await, + ClusterStateSuper::Ready(ready) => Ok(ready), + } + } +} + /// The retry client automatically retry the requests of the inner client api /// which raises the [`tonic::Status`] error #[derive(Debug)] @@ -183,7 +224,7 @@ pub(super) struct Retry { /// Retry config retry_config: RetryConfig, /// Cluster state - cluster_state: Arc>, + cluster_state: Arc, /// Keep alive client keep_alive: KeepAliveHandle, /// Fetch cluster object @@ -202,9 +243,12 @@ where retry_config: RetryConfig, keep_alive: KeepAlive, fetch: Fetch, + cluster_state_init: ClusterStateInit, ) -> Self { - // TODO: build state from parameters - let cluster_state = Arc::new(RwLock::default()); + let cluster_state = Arc::new(ClusterStateShared::new( + ClusterStateSuper::Init(cluster_state_init), + fetch.clone(), + )); let keep_alive_handle = keep_alive.spawn_keep_alive(Arc::clone(&cluster_state)); Self { inner, @@ -230,7 +274,7 @@ where let propose_id_guard = self.tracker.gen_propose_id(client_id); let first_incomplete = self.tracker.first_incomplete(); while let Some(delay) = backoff.next_delay() { - let cluster_state = self.cluster_state.read().clone(); + let cluster_state = self.cluster_state.ready_or_fetch().await?; let context = Context::new(*propose_id_guard, first_incomplete, cluster_state.clone()); let result = tokio::select! { result = f(&self.inner, context) => result, @@ -289,9 +333,8 @@ where | CurpError::WrongClusterVersion(()) | CurpError::Redirect(_) // FIXME: The redirect error needs to include full cluster state | CurpError::Zombie(()) => { - let (new_cluster_state, _) = self.fetch.fetch_cluster(cluster_state).await?; // TODO: Prevent concurrent updating cluster state - *self.cluster_state.write() = new_cluster_state; + let _ignore = self.cluster_state.fetch_and_update().await?; } } From bd30d1f8b394f3fe2c11226e1b7a9f8268cf9f72 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 18:16:21 +0800 Subject: [PATCH 107/322] chore: rename some types in `cluster_state.rs` Includes: * ClusterState -> ClusterStateReady * ClusterStateSuper -> ClusterState Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/cluster_state.rs | 26 ++++++++++++------------- crates/curp/src/client/fetch.rs | 8 ++++---- crates/curp/src/client/keep_alive.rs | 4 ++-- crates/curp/src/client/retry.rs | 26 ++++++++++++------------- 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index 594022309..2fb07d8c7 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -18,26 +18,28 @@ pub(crate) trait ForEachServer { /// Cluster State #[derive(Debug, Clone)] -pub(crate) enum ClusterStateSuper { +pub(crate) enum ClusterState { /// Initial cluster state Init(ClusterStateInit), /// Ready cluster state - Ready(ClusterState), + Ready(ClusterStateReady), } -impl ForEachServer for ClusterStateSuper { +impl ForEachServer for ClusterState { fn for_each_server>( &self, f: impl FnMut(Arc) -> F, ) -> FuturesUnordered { match *self { - ClusterStateSuper::Init(ref init) => init.for_each_server(f), - ClusterStateSuper::Ready(ref ready) => ready.for_each_server(f), + ClusterState::Init(ref init) => init.for_each_server(f), + ClusterState::Ready(ref ready) => ready.for_each_server(f), } } } -/// Initial cluster state +/// The initial cluster state +/// +/// The client must discover the cluster info before sending any propose #[derive(Clone)] pub(crate) struct ClusterStateInit { /// Member connects @@ -68,11 +70,9 @@ impl std::fmt::Debug for ClusterStateInit { } } -/// The cluster state -/// -/// The client must discover the cluster info before sending any propose +/// The cluster state that is ready for client propose #[derive(Clone, Default)] -pub(crate) struct ClusterState { +pub(crate) struct ClusterStateReady { /// Leader id. leader: ServerId, /// Term, initialize to 0, calibrated by the server. @@ -83,7 +83,7 @@ pub(crate) struct ClusterState { connects: HashMap>, } -impl std::fmt::Debug for ClusterState { +impl std::fmt::Debug for ClusterStateReady { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("State") .field("leader", &self.leader) @@ -94,7 +94,7 @@ impl std::fmt::Debug for ClusterState { } } -impl ForEachServer for ClusterState { +impl ForEachServer for ClusterStateReady { fn for_each_server>( &self, f: impl FnMut(Arc) -> F, @@ -103,7 +103,7 @@ impl ForEachServer for ClusterState { } } -impl ClusterState { +impl ClusterStateReady { /// Creates a new `ClusterState` pub(crate) fn new( leader: ServerId, diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index e3844ecd4..c0727416c 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -12,7 +12,7 @@ use crate::{ rpc::{self, connect::ConnectApi, CurpError, FetchClusterRequest, FetchClusterResponse}, }; -use super::cluster_state::{ClusterState, ClusterStateSuper, ForEachServer}; +use super::cluster_state::{ClusterState, ClusterStateReady, ForEachServer}; use super::config::Config; /// An override connect @@ -46,7 +46,7 @@ impl Fetch { pub(crate) async fn fetch_cluster( &self, state: impl ForEachServer, - ) -> Result<(ClusterState, FetchClusterResponse), CurpError> { + ) -> Result<(ClusterStateReady, FetchClusterResponse), CurpError> { /// Retry interval const FETCH_RETRY_INTERVAL: Duration = Duration::from_secs(1); loop { @@ -57,7 +57,7 @@ impl Fetch { let new_members = self.member_addrs(&resp); let new_connects = self.connect_to(new_members); let new_connects = self.override_connects(new_connects); - let new_state = ClusterState::new( + let new_state = ClusterStateReady::new( resp.leader_id .unwrap_or_else(|| unreachable!("leader id should be Some")) .into(), @@ -74,7 +74,7 @@ impl Fetch { } /// Fetch the term of the cluster. This ensures that the current leader is the latest. - async fn fetch_term(&self, state: &ClusterState) -> bool { + async fn fetch_term(&self, state: &ClusterStateReady) -> bool { let timeout = self.config.wait_synced_timeout(); let term = state.term(); let quorum = state.get_quorum(quorum); diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index a45169b61..4ce744be8 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -16,7 +16,7 @@ use tokio::{sync::broadcast, task::JoinHandle}; use tracing::{debug, info, warn}; use super::{ - cluster_state::{ClusterState, ClusterStateSuper}, + cluster_state::{ClusterState, ClusterStateReady}, fetch::Fetch, retry::ClusterStateShared, }; @@ -109,7 +109,7 @@ impl KeepAlive { pub(crate) async fn keep_alive_with( &self, client_id: u64, - cluster_state: ClusterState, + cluster_state: ClusterStateReady, ) -> Result { cluster_state .map_leader(|conn| async move { diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index ca703ba28..74c0d193b 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -10,7 +10,7 @@ use parking_lot::RwLock; use tracing::{debug, warn}; use super::{ - cluster_state::{ClusterState, ClusterStateInit, ClusterStateSuper}, + cluster_state::{ClusterState, ClusterStateInit, ClusterStateReady}, config::Config, fetch::Fetch, keep_alive::{KeepAlive, KeepAliveHandle}, @@ -114,7 +114,7 @@ pub(crate) struct Context { /// First incomplete seqence first_incomplete: u64, /// The current cluster state - cluster_state: ClusterState, + cluster_state: ClusterStateReady, } impl Context { @@ -122,7 +122,7 @@ impl Context { pub(crate) fn new( propose_id: ProposeId, first_incomplete: u64, - cluster_state: ClusterState, + cluster_state: ClusterStateReady, ) -> Self { Self { propose_id, @@ -142,7 +142,7 @@ impl Context { } /// Returns the current client id - pub(crate) fn cluster_state(&self) -> ClusterState { + pub(crate) fn cluster_state(&self) -> ClusterStateReady { self.cluster_state.clone() } } @@ -179,14 +179,14 @@ impl CmdTracker { #[derive(Debug)] pub(crate) struct ClusterStateShared { /// Inner state - inner: RwLock, + inner: RwLock, /// Fetch cluster object fetch: Fetch, } impl ClusterStateShared { /// Creates a new `ClusterStateShared` - fn new(inner: ClusterStateSuper, fetch: Fetch) -> Self { + fn new(inner: ClusterState, fetch: Fetch) -> Self { Self { inner: RwLock::new(inner), fetch, @@ -196,21 +196,21 @@ impl ClusterStateShared { /// Fetch and updates current state /// /// Returns the fetched cluster state - pub(crate) async fn fetch_and_update(&self) -> Result { + pub(crate) async fn fetch_and_update(&self) -> Result { let current = self.inner.read().clone(); let (new_state, _) = self.fetch.fetch_cluster(current).await?; - *self.inner.write() = ClusterStateSuper::Ready(new_state.clone()); + *self.inner.write() = ClusterState::Ready(new_state.clone()); debug!("cluster state updates to: {new_state:?}"); Ok(new_state) } /// Retrieves the cluster state if it's ready, or fetches and updates it if not. - pub(crate) async fn ready_or_fetch(&self) -> Result { + pub(crate) async fn ready_or_fetch(&self) -> Result { let current = self.inner.read().clone(); match current { - ClusterStateSuper::Init(init) => self.fetch_and_update().await, - ClusterStateSuper::Ready(ready) => Ok(ready), + ClusterState::Init(init) => self.fetch_and_update().await, + ClusterState::Ready(ready) => Ok(ready), } } } @@ -246,7 +246,7 @@ where cluster_state_init: ClusterStateInit, ) -> Self { let cluster_state = Arc::new(ClusterStateShared::new( - ClusterStateSuper::Init(cluster_state_init), + ClusterState::Init(cluster_state_init), fetch.clone(), )); let keep_alive_handle = keep_alive.spawn_keep_alive(Arc::clone(&cluster_state)); @@ -309,7 +309,7 @@ where async fn handle_err( &self, err: &CurpError, - cluster_state: ClusterState, + cluster_state: ClusterStateReady, ) -> Result<(), tonic::Status> { match *err { // some errors that should not retry From 03cecec13ed61d18aadff64bdf54ce64e63fda44 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 18:41:04 +0800 Subject: [PATCH 108/322] fix: reconnect keep alive hang Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/rpc/reconnect.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/crates/curp/src/rpc/reconnect.rs b/crates/curp/src/rpc/reconnect.rs index f92844234..a52af876a 100644 --- a/crates/curp/src/rpc/reconnect.rs +++ b/crates/curp/src/rpc/reconnect.rs @@ -45,7 +45,6 @@ impl Reconnect { // Cancel the leader keep alive loop task because it hold a read lock let _cancel = self.event.notify(1); let _ignore = self.connect.write().await.replace(new_connect); - let _continue = self.event.notify(1); } /// Try to reconnect if the result is `Err` @@ -177,12 +176,9 @@ impl ConnectApi for Reconnect { async fn lease_keep_alive(&self, client_id: u64, interval: Duration) -> Result { let connect = self.connect.read().await; let connect_ref = connect.as_ref().unwrap(); - let result = tokio::select! { + tokio::select! { result = connect_ref.lease_keep_alive(client_id, interval) => result, _empty = self.event.listen() => Err(CurpError::RpcTransport(())), - }; - // Wait for connection update - self.event.listen().await; - result + } } } From 9f322df2de49bab848eabdfb6ed92adc868ac537 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 20:03:10 +0800 Subject: [PATCH 109/322] feat: add `StreamingProtocol` for implement handling bypassed streaming request Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/mod.rs | 3 ++- crates/curp/src/client/state.rs | 3 ++- crates/curp/src/rpc/connect.rs | 19 ++++++++++++------- crates/curp/src/server/mod.rs | 27 +++++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 9 deletions(-) diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 903edc7cf..0b9bf6f10 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -73,6 +73,7 @@ use crate::{ ConfChange, FetchClusterRequest, FetchClusterResponse, Member, ProposeId, Protocol, ReadState, }, + server::StreamingProtocol, tracker::Tracker, }; @@ -493,7 +494,7 @@ impl ClientBuilder { } } -impl ClientBuilderWithBypass

{ +impl ClientBuilderWithBypass

{ /// Build the state with local server pub(super) fn bypassed_connect( local_server_id: ServerId, diff --git a/crates/curp/src/client/state.rs b/crates/curp/src/client/state.rs index 8a9b53081..39cc7fea9 100644 --- a/crates/curp/src/client/state.rs +++ b/crates/curp/src/client/state.rs @@ -22,6 +22,7 @@ use crate::{ connect::{BypassedConnect, ConnectApi}, CurpError, FetchClusterRequest, FetchClusterResponse, Protocol, }, + server::StreamingProtocol, }; /// The client state @@ -420,7 +421,7 @@ impl StateBuilder { } /// Build the state with local server - pub(super) fn build_bypassed( + pub(super) fn build_bypassed( mut self, local_server_id: ServerId, local_server: P, diff --git a/crates/curp/src/rpc/connect.rs b/crates/curp/src/rpc/connect.rs index 68c8ae18d..89c1fc716 100644 --- a/crates/curp/src/rpc/connect.rs +++ b/crates/curp/src/rpc/connect.rs @@ -37,6 +37,7 @@ use crate::{ Protocol, PublishRequest, PublishResponse, ShutdownRequest, ShutdownResponse, TriggerShutdownRequest, TryBecomeLeaderNowRequest, VoteRequest, VoteResponse, }, + server::StreamingProtocol, snapshot::Snapshot, }; @@ -676,7 +677,7 @@ impl Bypass for tonic::metadata::MetadataMap { #[async_trait] impl ConnectApi for BypassedConnect where - T: Protocol, + T: Protocol + StreamingProtocol, { /// Get server id fn id(&self) -> ServerId { @@ -808,12 +809,16 @@ where } /// Keep send lease keep alive to server and mutate the client id - async fn lease_keep_alive( - &self, - _client_id: u64, - _interval: Duration, - ) -> Result { - unreachable!("cannot invoke lease_keep_alive in bypassed connect") + async fn lease_keep_alive(&self, client_id: u64, interval: Duration) -> Result { + let stream = heartbeat_stream(client_id, interval); + let new_id = StreamingProtocol::lease_keep_alive(&self.server, stream) + .await? + .into_inner() + .client_id; + // The only place to update the client id for follower + info!("client_id update to {new_id}"); + + Ok(new_id) } } diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index 4e6806495..d2dc8d99f 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -2,6 +2,7 @@ use std::{fmt::Debug, sync::Arc}; use engine::SnapshotAllocator; use flume::r#async::RecvStream; +use futures::{Stream, StreamExt}; use tokio::sync::broadcast; #[cfg(not(madsim))] use tonic::transport::ClientTlsConfig; @@ -256,6 +257,32 @@ impl, RC: RoleChange> crate::rpc::InnerProtoc } } +/// Used for bypassed connect because the `Protocol` trait requires `tonic::Streaming` +/// as request type and there's no easy way to convert a Stream into that. +#[async_trait::async_trait] +pub trait StreamingProtocol { + /// Lease keep alive + async fn lease_keep_alive( + &self, + request: impl Stream + Send, + ) -> Result, tonic::Status>; +} + +#[async_trait::async_trait] +impl, RC: RoleChange> StreamingProtocol for Rpc { + #[instrument(skip_all, name = "lease_keep_alive")] + async fn lease_keep_alive( + &self, + request: impl Stream + Send, + ) -> Result, tonic::Status> { + let stream = request.map(Ok::<_, std::io::Error>); + + Ok(tonic::Response::new( + self.inner.lease_keep_alive(stream).await?, + )) + } +} + impl, RC: RoleChange> Rpc { /// New `Rpc` /// From 6345c966e0a3772fb8a57263a22332598807da15 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 20:11:28 +0800 Subject: [PATCH 110/322] fix: timeout in log truncation tests Reduces the number of entries in tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/storage/wal/tests.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/curp/src/server/storage/wal/tests.rs b/crates/curp/src/server/storage/wal/tests.rs index cbb942837..f0de669ba 100644 --- a/crates/curp/src/server/storage/wal/tests.rs +++ b/crates/curp/src/server/storage/wal/tests.rs @@ -26,7 +26,7 @@ fn simple_append_and_recovery_is_ok() { #[test] fn log_head_truncation_is_ok() { - for num_entries in 1..40 { + for num_entries in 1..10 { for truncate_at in 1..=num_entries { let wal_test_path = tempfile::tempdir().unwrap(); test_head_truncate_at(wal_test_path.path(), num_entries, truncate_at as u64); @@ -37,7 +37,7 @@ fn log_head_truncation_is_ok() { #[test] fn log_tail_truncation_is_ok() { - for num_entries in 1..40 { + for num_entries in 1..10 { for truncate_at in 1..=num_entries { let wal_test_path = tempfile::tempdir().unwrap(); test_tail_truncate_at(wal_test_path.path(), num_entries, truncate_at as u64); From 555978e31888114fa32ce469a2fd102c7fea79af Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 20:24:27 +0800 Subject: [PATCH 111/322] refactor: updates cluster state on error in keep alive Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/keep_alive.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index 4ce744be8..7cac42f67 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -91,8 +91,10 @@ impl KeepAlive { } Err(e) => { warn!("keep alive failed: {e:?}"); - // Sleep for some time, the cluster state should be updated in a while - tokio::time::sleep(FAIL_SLEEP_DURATION).await; + if let Err(err) = cluster_state.fetch_and_update().await { + warn!("fetch cluster failed: {err:?}"); + tokio::time::sleep(FAIL_SLEEP_DURATION).await; + } } } } From 7e340f2ad448b401b1d130a6d63c75dd528089c8 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 20:28:24 +0800 Subject: [PATCH 112/322] fix: panic when tonic closes channel asynchronously Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/rpc/connect.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/curp/src/rpc/connect.rs b/crates/curp/src/rpc/connect.rs index 89c1fc716..35d2065aa 100644 --- a/crates/curp/src/rpc/connect.rs +++ b/crates/curp/src/rpc/connect.rs @@ -81,9 +81,13 @@ fn connect_to( for addr in &addrs { let endpoint = build_endpoint(addr, tls_config.as_ref()) .unwrap_or_else(|_| unreachable!("address is ill-formatted")); - change_tx + if change_tx .try_send(tower::discover::Change::Insert(addr.clone(), endpoint)) - .unwrap_or_else(|_| unreachable!("unknown channel tx send error")); + .is_err() + { + // It seems that tonic would close the channel asynchronously + debug!("failed to update channel due to runtime closed"); + } } let client = Client::from_channel(channel); Connect { From 39d48ad0ece42a990fef7a5ffe6f6cfa1cddea23 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 5 Sep 2024 21:45:18 +0800 Subject: [PATCH 113/322] fix: set client inside xline to use raw curp addr Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/xline/src/server/xline_server.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/xline/src/server/xline_server.rs b/crates/xline/src/server/xline_server.rs index 655953c79..182c0c4ad 100644 --- a/crates/xline/src/server/xline_server.rs +++ b/crates/xline/src/server/xline_server.rs @@ -521,7 +521,7 @@ impl XlineServer { ); let client = Arc::new( - CurpClientBuilder::new(*self.cluster_config.client_config(), false) + CurpClientBuilder::new(*self.cluster_config.client_config(), true) .tls_config(self.client_tls_config.clone()) .cluster_version(self.cluster_info.cluster_version()) .all_members(self.cluster_info.all_members_peer_urls()) From 7a34aeab31bf5cc04640dae71569ef744c9cd7dd Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 6 Sep 2024 09:48:23 +0800 Subject: [PATCH 114/322] refactor: exit keep alive task on cluster shutdown Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/keep_alive.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index 7cac42f67..cdb52f9a4 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -73,8 +73,13 @@ impl KeepAlive { let handle = tokio::spawn(async move { loop { let fetch_result = cluster_state.ready_or_fetch().await; + // TODO: make the error handling code reusable let current_state = match fetch_result { Ok(ready) => ready, + Err(CurpError::ShuttingDown(())) => { + info!("cluster is shutting down, exiting keep alive task"); + return; + } Err(e) => { warn!("fetch cluster failed: {e:?}"); // Sleep for some time, the cluster state should be updated in a while @@ -89,6 +94,10 @@ impl KeepAlive { client_id.store(new_id, Ordering::Relaxed); let _ignore = update_event.notify(usize::MAX); } + Err(CurpError::ShuttingDown(())) => { + info!("cluster is shutting down, exiting keep alive task"); + return; + } Err(e) => { warn!("keep alive failed: {e:?}"); if let Err(err) = cluster_state.fetch_and_update().await { From 2513da7752e22dafb1f3f7cf38c798fdee112d1d Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 6 Sep 2024 10:32:57 +0800 Subject: [PATCH 115/322] fix: `wait_id_update` may lost event Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/keep_alive.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index cdb52f9a4..b0b5bdaf3 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -44,11 +44,12 @@ impl KeepAliveHandle { /// Wait for the client id pub(crate) async fn wait_id_update(&self, current_id: u64) -> u64 { loop { + let listen_update = self.update_event.listen(); let id = self.client_id.load(Ordering::Relaxed); if current_id != id { return id; } - self.update_event.listen().await; + listen_update.await; } } } From 6b9a889bacac3f9c2d2e599255dd3b55356bb0d0 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 6 Sep 2024 10:35:27 +0800 Subject: [PATCH 116/322] chore: remove unused files in client Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/mod.rs | 8 - crates/curp/src/client/state.rs | 476 ------------------------------- crates/curp/src/client/stream.rs | 121 -------- 3 files changed, 605 deletions(-) delete mode 100644 crates/curp/src/client/state.rs delete mode 100644 crates/curp/src/client/stream.rs diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 0b9bf6f10..4ee11b3f9 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -8,18 +8,10 @@ mod metrics; /// Unary rpc client mod unary; -#[cfg(ignore)] -/// Stream rpc client -mod stream; - #[allow(unused)] /// Retry layer mod retry; -#[allow(unused)] -/// State for clients -mod state; - #[allow(unused)] /// State of the cluster mod cluster_state; diff --git a/crates/curp/src/client/state.rs b/crates/curp/src/client/state.rs deleted file mode 100644 index 39cc7fea9..000000000 --- a/crates/curp/src/client/state.rs +++ /dev/null @@ -1,476 +0,0 @@ -use std::{ - cmp::Ordering, - collections::{hash_map::Entry, HashMap, HashSet}, - sync::{atomic::AtomicU64, Arc}, - time::Duration, -}; - -use event_listener::Event; -use futures::{stream::FuturesUnordered, Future}; -use rand::seq::IteratorRandom; -use tokio::sync::RwLock; -#[cfg(not(madsim))] -use tonic::transport::ClientTlsConfig; -use tracing::{debug, info}; -#[cfg(madsim)] -use utils::ClientTlsConfig; - -use crate::{ - members::ServerId, - rpc::{ - self, - connect::{BypassedConnect, ConnectApi}, - CurpError, FetchClusterRequest, FetchClusterResponse, Protocol, - }, - server::StreamingProtocol, -}; - -/// The client state -#[derive(Debug)] -pub(super) struct State { - /// Mutable state - mutable: RwLock, - /// Immutable state - immutable: StateStatic, - /// The client id. Separated from `mutable` because the client ID will be updated in the background. - client_id: Arc, -} - -/// Immutable client state, could be cloned -#[derive(Debug, Clone)] -struct StateStatic { - /// is current client send request to raw curp server - is_raw_curp: bool, - /// Local server id, should be initialized on startup - local_server: Option, - /// Notifier of leader update - leader_notifier: Arc, - /// Client tls config - tls_config: Option, -} - -/// Mutable client state -struct StateMut { - /// Leader id. At the beginning, we may not know who the leader is. - leader: Option, - /// Term, initialize to 0, calibrated by the server. - term: u64, - /// Cluster version, initialize to 0, calibrated by the server. - cluster_version: u64, - /// Members' connect, calibrated by the server. - connects: HashMap>, -} - -impl std::fmt::Debug for StateMut { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("State") - .field("leader", &self.leader) - .field("term", &self.term) - .field("cluster_version", &self.cluster_version) - .field("connects", &self.connects.keys()) - .finish() - } -} - -impl State { - /// For test - #[cfg(test)] - pub(super) fn new_arc( - connects: HashMap>, - local_server: Option, - leader: Option, - term: u64, - cluster_version: u64, - tls_config: Option, - ) -> Arc { - Arc::new(Self { - mutable: RwLock::new(StateMut { - leader, - term, - cluster_version, - connects, - }), - immutable: StateStatic { - local_server, - leader_notifier: Arc::new(Event::new()), - tls_config, - is_raw_curp: true, - }, - // Sets the client id to non-zero to avoid waiting for client id in tests - client_id: Arc::new(AtomicU64::new(1)), - }) - } - - /// Get the leader notifier - pub(super) fn leader_notifier(&self) -> &Event { - &self.immutable.leader_notifier - } - - /// Clone a reference to client id - pub(super) fn clone_client_id(&self) -> Arc { - Arc::clone(&self.client_id) - } - - /// Get the client id - pub(super) fn client_id(&self) -> u64 { - self.client_id.load(std::sync::atomic::Ordering::Relaxed) - } - - /// Generate client id if it does not exist when it is the leader - pub(crate) async fn check_gen_local_client_id(&self) { - let local_server_id = self.immutable.local_server; - let leader_id = self.leader_id().await; - if local_server_id != leader_id { - return; - } - if self.client_id.load(std::sync::atomic::Ordering::Relaxed) == 0 { - let id = rand::random(); - self.client_id - .store(id, std::sync::atomic::Ordering::Relaxed); - info!("generate client id({id}) locally for bypassed client"); - } - } - - /// Choose a random server to try to refresh the state - /// Use when the current leader is missing. - pub(crate) async fn try_refresh_state(&self) -> Result<(), CurpError> { - /// The timeout for refreshing the state - const REFRESH_TIMEOUT: Duration = Duration::from_secs(1); - - let rand_conn = { - let state = self.mutable.read().await; - state - .connects - .values() - .choose(&mut rand::thread_rng()) - .map(Arc::clone) - .ok_or_else(CurpError::wrong_cluster_version)? - }; - let resp = rand_conn - .fetch_cluster(FetchClusterRequest::default(), REFRESH_TIMEOUT) - .await; - self.check_and_update(&resp?.into_inner()).await?; - Ok(()) - } - - /// Get the local server connection - pub(super) async fn local_connect(&self) -> Option> { - let id = self.immutable.local_server?; - self.mutable.read().await.connects.get(&id).map(Arc::clone) - } - - /// Get the local server id - pub(super) fn local_server_id(&self) -> Option { - self.immutable.local_server - } - - /// Get the cluster version - pub(super) async fn cluster_version(&self) -> u64 { - self.mutable.read().await.cluster_version - } - - /// Get the cached leader id - pub(super) async fn leader_id(&self) -> Option { - self.mutable.read().await.leader - } - - /// Get term of the cluster - pub(super) async fn term(&self) -> u64 { - self.mutable.read().await.term - } - - /// Take an async function and map to the dedicated server, return `Err(CurpError:WrongClusterVersion(()))` - /// if the server can not found in local state - pub(super) async fn map_server>>( - &self, - id: ServerId, - f: impl FnOnce(Arc) -> F, - ) -> Result { - let conn = { - // If the leader id cannot be found in connects, it indicates that there is - // an inconsistency between the client's local leader state and the cluster - // state, then mock a `WrongClusterVersion` return to the outside. - self.mutable - .read() - .await - .connects - .get(&id) - .map(Arc::clone) - .ok_or_else(CurpError::wrong_cluster_version)? - }; - f(conn).await - } - - /// Returns the number of members in the cluster - pub(super) async fn connects_len(&self) -> usize { - self.mutable.read().await.connects.len() - } - - /// Take an async function and map to all server, returning `FuturesUnordered` - pub(super) async fn for_each_server>( - &self, - f: impl FnMut(Arc) -> F, - ) -> FuturesUnordered { - self.mutable - .read() - .await - .connects - .values() - .map(Arc::clone) - .map(f) - .collect() - } - - /// Take an async function and map to all server, returning `FuturesUnordered` - pub(super) async fn for_each_follower>( - &self, - leader_id: u64, - f: impl FnMut(Arc) -> F, - ) -> FuturesUnordered { - let mutable_r = self.mutable.read().await; - mutable_r - .connects - .iter() - .filter_map(|(id, conn)| (*id != leader_id).then_some(conn)) - .map(Arc::clone) - .map(f) - .collect() - } - - /// Inner check and update leader - fn check_and_update_leader_inner( - &self, - state: &mut StateMut, - leader_id: Option, - term: u64, - ) -> bool { - match state.term.cmp(&term) { - Ordering::Less => { - // reset term only when the resp has leader id to prevent: - // If a server loses contact with its leader, it will update its term for election. Since other servers are all right, the election will not succeed. - // But if the client learns about the new term and updates its term to it, it will never get the true leader. - if let Some(new_leader_id) = leader_id { - info!("client term updates to {term}, client leader id updates to {new_leader_id}"); - state.term = term; - state.leader = Some(new_leader_id); - let _ignore = self.immutable.leader_notifier.notify(usize::MAX); - } - } - Ordering::Equal => { - if let Some(new_leader_id) = leader_id { - if state.leader.is_none() { - info!("client leader id updates to {new_leader_id}"); - state.leader = Some(new_leader_id); - let _ignore = self.immutable.leader_notifier.notify(usize::MAX); - } - assert_eq!( - state.leader, - Some(new_leader_id), - "there should never be two leader in one term" - ); - } - } - Ordering::Greater => { - debug!("ignore old term({}) from server", term); - return false; - } - } - true - } - - /// Update leader - pub(super) async fn check_and_update_leader( - &self, - leader_id: Option, - term: u64, - ) -> bool { - let mut state = self.mutable.write().await; - self.check_and_update_leader_inner(&mut state, leader_id, term) - } - - /// Update client state based on [`FetchClusterResponse`] - pub(super) async fn check_and_update( - &self, - res: &FetchClusterResponse, - ) -> Result<(), tonic::transport::Error> { - let mut state = self.mutable.write().await; - if !self.check_and_update_leader_inner( - &mut state, - res.leader_id.as_ref().map(Into::into), - res.term, - ) { - return Ok(()); - } - if state.cluster_version == res.cluster_version { - debug!( - "ignore cluster version({}) from server", - res.cluster_version - ); - return Ok(()); - } - - info!("client cluster version updated to {}", res.cluster_version); - state.cluster_version = res.cluster_version; - - let mut new_members = if self.immutable.is_raw_curp { - res.clone().into_peer_urls() - } else { - res.clone().into_client_urls() - }; - let old_ids = state.connects.keys().copied().collect::>(); - let new_ids = new_members.keys().copied().collect::>(); - - let diffs = &old_ids ^ &new_ids; - let sames = &old_ids & &new_ids; - - for diff in diffs { - if let Entry::Vacant(e) = state.connects.entry(diff) { - let addrs = new_members - .remove(&diff) - .unwrap_or_else(|| unreachable!("{diff} must in new member addrs")); - debug!("client connects to a new server({diff}), address({addrs:?})"); - let new_conn = rpc::connect(diff, addrs, self.immutable.tls_config.clone()); - let _ig = e.insert(new_conn); - } else { - debug!("client removes old server({diff})"); - let _ig = state.connects.remove(&diff); - } - } - for same in sames { - let conn = state - .connects - .get(&same) - .unwrap_or_else(|| unreachable!("{same} must in old connects")); - let addrs = new_members - .remove(&same) - .unwrap_or_else(|| unreachable!("{same} must in new member addrs")); - conn.update_addrs(addrs).await?; - } - - Ok(()) - } - - /// Wait for client id - pub(super) async fn wait_for_client_id(&self) -> Result { - /// Max retry count for waiting for a client ID - /// - /// TODO: This retry count is set relatively high to avoid test cluster startup timeouts. - /// We should consider setting this to a more reasonable value. - const RETRY_COUNT: usize = 30; - /// The interval for each retry - const RETRY_INTERVAL: Duration = Duration::from_secs(1); - - for _ in 0..RETRY_COUNT { - let client_id = self.client_id(); - if client_id != 0 { - return Ok(client_id); - } - debug!("waiting for client_id"); - tokio::time::sleep(RETRY_INTERVAL).await; - } - - Err(tonic::Status::deadline_exceeded( - "timeout waiting for client id", - )) - } -} - -/// Builder for state -#[derive(Debug, Clone)] -pub(super) struct StateBuilder { - /// All members (required) - all_members: HashMap>, - /// Initial leader state (optional) - leader_state: Option<(ServerId, u64)>, - /// Initial cluster version (optional) - cluster_version: Option, - /// Client Tls config - tls_config: Option, - /// is current client send request to raw curp server - is_raw_curp: bool, -} - -impl StateBuilder { - /// Create a state builder - pub(super) fn new( - all_members: HashMap>, - tls_config: Option, - ) -> Self { - Self { - all_members, - leader_state: None, - cluster_version: None, - tls_config, - is_raw_curp: false, - } - } - - /// Set is raw curp - pub(super) fn set_is_raw_curp(&mut self, is_raw_curp: bool) { - self.is_raw_curp = is_raw_curp; - } - - /// Set the leader state (optional) - pub(super) fn set_leader_state(&mut self, id: ServerId, term: u64) { - self.leader_state = Some((id, term)); - } - - /// Set the cluster version (optional) - pub(super) fn set_cluster_version(&mut self, cluster_version: u64) { - self.cluster_version = Some(cluster_version); - } - - /// Build the state with local server - pub(super) fn build_bypassed( - mut self, - local_server_id: ServerId, - local_server: P, - ) -> State { - debug!("client bypassed server({local_server_id})"); - - let _ig = self.all_members.remove(&local_server_id); - let mut connects: HashMap<_, _> = - rpc::connects(self.all_members.clone(), self.tls_config.as_ref()).collect(); - let __ig = connects.insert( - local_server_id, - Arc::new(BypassedConnect::new(local_server_id, local_server)), - ); - - State { - mutable: RwLock::new(StateMut { - leader: self.leader_state.map(|state| state.0), - term: self.leader_state.map_or(0, |state| state.1), - cluster_version: self.cluster_version.unwrap_or_default(), - connects, - }), - immutable: StateStatic { - local_server: Some(local_server_id), - leader_notifier: Arc::new(Event::new()), - tls_config: self.tls_config.take(), - is_raw_curp: self.is_raw_curp, - }, - client_id: Arc::new(AtomicU64::new(0)), - } - } - - /// Build the state - pub(super) fn build(self) -> State { - let connects: HashMap<_, _> = - rpc::connects(self.all_members.clone(), self.tls_config.as_ref()).collect(); - State { - mutable: RwLock::new(StateMut { - leader: self.leader_state.map(|state| state.0), - term: self.leader_state.map_or(0, |state| state.1), - cluster_version: self.cluster_version.unwrap_or_default(), - connects, - }), - immutable: StateStatic { - local_server: None, - leader_notifier: Arc::new(Event::new()), - tls_config: self.tls_config, - is_raw_curp: self.is_raw_curp, - }, - client_id: Arc::new(AtomicU64::new(0)), - } - } -} diff --git a/crates/curp/src/client/stream.rs b/crates/curp/src/client/stream.rs deleted file mode 100644 index 9ebeb1599..000000000 --- a/crates/curp/src/client/stream.rs +++ /dev/null @@ -1,121 +0,0 @@ -use std::{sync::Arc, time::Duration}; - -use futures::Future; -use tracing::{debug, info, warn}; - -use super::state::State; -use crate::rpc::{connect::ConnectApi, CurpError, Redirect}; - -/// Stream client config -#[derive(Debug)] -pub(super) struct StreamingConfig { - /// Heartbeat interval - heartbeat_interval: Duration, -} - -impl StreamingConfig { - /// Create a stream client config - pub(super) fn new(heartbeat_interval: Duration) -> Self { - Self { heartbeat_interval } - } -} - -/// Stream client -#[derive(Debug)] -pub(super) struct Streaming { - /// Shared client state - pub(super) state: Arc, - /// Stream client config - config: StreamingConfig, -} - -/// Prevent lock contention when leader crashed or some unknown errors -const RETRY_DELAY: Duration = Duration::from_millis(100); - -impl Streaming { - /// Create a stream client - pub(super) fn new(state: Arc, config: StreamingConfig) -> Self { - Self { state, config } - } - - /// Take an async function and map to the remote leader, hang up when no leader found or - /// the leader is itself. - async fn map_remote_leader>>( - &self, - f: impl FnOnce(Arc) -> F, - ) -> Result { - loop { - let Some(leader_id) = self.state.leader_id().await else { - warn!("cannot find leader_id, refreshing state..."); - let _ig = self.state.try_refresh_state().await; - tokio::time::sleep(RETRY_DELAY).await; - continue; - }; - if let Some(local_id) = self.state.local_server_id() { - if leader_id == local_id { - self.state.check_gen_local_client_id().await; - debug!("skip keep heartbeat for local connection, wait for leadership update"); - self.state.leader_notifier().listen().await; - continue; - } - } - return self.state.map_server(leader_id, f).await; - } - } - - /// Keep heartbeat - pub(super) async fn keep_heartbeat(&self) { - #[allow(clippy::ignored_unit_patterns)] // tokio select internal triggered - loop { - let heartbeat = self.map_remote_leader::<(), _>(|conn| async move { - loop { - let err = conn - .lease_keep_alive( - self.state.clone_client_id(), - self.config.heartbeat_interval, - ) - .await; - #[allow(clippy::wildcard_enum_match_arm)] - match err { - CurpError::Redirect(Redirect { leader_id, term }) => { - let _ig = self - .state - .check_and_update_leader(leader_id.map(Into::into), term) - .await; - } - CurpError::WrongClusterVersion(()) => { - warn!( - "cannot find the leader in connects, wait for leadership update" - ); - self.state.leader_notifier().listen().await; - } - CurpError::RpcTransport(()) => { - warn!( - "got rpc transport error when keep heartbeat, refreshing state..." - ); - let _ig = self.state.try_refresh_state().await; - tokio::time::sleep(RETRY_DELAY).await; - } - CurpError::ShuttingDown(()) => { - info!("cluster is shutting down, exiting heartbeat task"); - return Ok(()); - } - _ => { - warn!("got unexpected error {err:?} when keep heartbeat, retrying..."); - tokio::time::sleep(RETRY_DELAY).await; - } - } - } - }); - - tokio::select! { - _ = self.state.leader_notifier().listen() => { - debug!("interrupt keep heartbeat because leadership changed"); - } - _ = heartbeat => { - break; - } - } - } - } -} From 4b11fc448a8958d2447dbe686cff35fe23dc26f4 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 6 Sep 2024 13:04:08 +0800 Subject: [PATCH 117/322] refactor: pass `ClusterState` to Retry::new Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/mod.rs | 9 ++++----- crates/curp/src/client/retry.rs | 7 ++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 4ee11b3f9..3b47cb281 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -28,9 +28,8 @@ mod config; /// Lease keep alive implementation mod keep_alive; -// TODO: rewrite these tests /// Tests for client -#[cfg(ignore)] +#[cfg(test)] mod tests; #[cfg(madsim)] @@ -49,7 +48,7 @@ use utils::ClientTlsConfig; use utils::{build_endpoint, config::ClientConfig}; use self::{ - cluster_state::ClusterStateInit, + cluster_state::{ClusterState, ClusterStateInit}, config::Config, fetch::Fetch, keep_alive::KeepAlive, @@ -453,7 +452,7 @@ impl ClientBuilder { self.init_retry_config(), keep_alive, fetch, - cluster_state_init, + ClusterState::Init(cluster_state_init), ); Ok(client) @@ -516,7 +515,7 @@ impl ClientBuilderWithBypass

{ self.inner.init_retry_config(), keep_alive, fetch, - cluster_state_init, + ClusterState::Init(cluster_state_init), ); Ok(client) diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 74c0d193b..fcf227907 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -243,12 +243,9 @@ where retry_config: RetryConfig, keep_alive: KeepAlive, fetch: Fetch, - cluster_state_init: ClusterStateInit, + cluster_state: ClusterState, ) -> Self { - let cluster_state = Arc::new(ClusterStateShared::new( - ClusterState::Init(cluster_state_init), - fetch.clone(), - )); + let cluster_state = Arc::new(ClusterStateShared::new(cluster_state, fetch.clone())); let keep_alive_handle = keep_alive.spawn_keep_alive(Arc::clone(&cluster_state)); Self { inner, From 0355f3f564fe8bc48f444a554f0e6ed18dcf2b80 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 6 Sep 2024 15:35:16 +0800 Subject: [PATCH 118/322] test: rewrite curp client tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/fetch.rs | 197 +++++++++++++++++++++++ crates/curp/src/client/tests.rs | 273 ++++++-------------------------- 2 files changed, 245 insertions(+), 225 deletions(-) diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index c0727416c..650e632c9 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -165,3 +165,200 @@ impl std::fmt::Debug for Fetch { .finish() } } + +#[cfg(test)] +mod test { + use std::{collections::HashMap, sync::Arc, time::Duration}; + + use futures::stream::FuturesUnordered; + use tracing_test::traced_test; + + use crate::{ + client::{cluster_state::ForEachServer, config::Config, tests::init_mocked_connects}, + rpc::{connect::ConnectApi, CurpError, FetchClusterResponse, Member}, + }; + + use super::Fetch; + + impl ForEachServer for HashMap> { + fn for_each_server>( + &self, + f: impl FnMut(Arc) -> F, + ) -> FuturesUnordered { + self.values().cloned().map(f).collect() + } + } + + /// Create unary client for test + fn init_fetch() -> Fetch { + Fetch::new(Config::new( + None, + None, + Duration::from_secs(1), + Duration::from_secs(1), + true, + )) + } + + #[traced_test] + #[tokio::test(flavor = "multi_thread")] + async fn test_unary_fetch_clusters_serializable() { + let connects = init_mocked_connects(3, |_id, conn| { + conn.expect_fetch_cluster().returning(|_req, _timeout| { + Ok(tonic::Response::new(FetchClusterResponse { + leader_id: Some(0.into()), + term: 1, + cluster_id: 123, + members: vec![ + Member::new(0, "S0", vec!["A0".to_owned()], [], false), + Member::new(1, "S1", vec!["A1".to_owned()], [], false), + Member::new(2, "S2", vec!["A2".to_owned()], [], false), + ], + cluster_version: 1, + })) + }); + }); + let fetch = init_fetch(); + let (_, res) = fetch.fetch_cluster(connects).await.unwrap(); + assert_eq!( + res.into_peer_urls(), + HashMap::from([ + (0, vec!["A0".to_owned()]), + (1, vec!["A1".to_owned()]), + (2, vec!["A2".to_owned()]) + ]) + ); + } + + #[traced_test] + #[tokio::test] + async fn test_unary_fetch_clusters_linearizable() { + let connects = init_mocked_connects(5, |id, conn| { + conn.expect_fetch_cluster() + .return_once(move |_req, _timeout| { + let resp = match id { + 0 => FetchClusterResponse { + leader_id: Some(0.into()), + term: 2, + cluster_id: 123, + members: vec![ + Member::new(0, "S0", vec!["A0".to_owned()], [], false), + Member::new(1, "S1", vec!["A1".to_owned()], [], false), + Member::new(2, "S2", vec!["A2".to_owned()], [], false), + Member::new(3, "S3", vec!["A3".to_owned()], [], false), + Member::new(4, "S4", vec!["A4".to_owned()], [], false), + ], + cluster_version: 1, + }, + 1 | 4 => FetchClusterResponse { + leader_id: Some(0.into()), + term: 2, + cluster_id: 123, + members: vec![], // linearizable read from follower returns empty members + cluster_version: 1, + }, + 2 => FetchClusterResponse { + leader_id: None, + term: 23, // abnormal term + cluster_id: 123, + members: vec![], + cluster_version: 1, + }, + 3 => FetchClusterResponse { + leader_id: Some(3.into()), // imagine this node is a old leader + term: 1, // with the old term + cluster_id: 123, + members: vec![ + Member::new(0, "S0", vec!["B0".to_owned()], [], false), + Member::new(1, "S1", vec!["B1".to_owned()], [], false), + Member::new(2, "S2", vec!["B2".to_owned()], [], false), + Member::new(3, "S3", vec!["B3".to_owned()], [], false), + Member::new(4, "S4", vec!["B4".to_owned()], [], false), + ], + cluster_version: 1, + }, + _ => unreachable!("there are only 5 nodes"), + }; + Ok(tonic::Response::new(resp)) + }); + }); + let fetch = init_fetch(); + let (_, res) = fetch.fetch_cluster(connects).await.unwrap(); + assert_eq!( + res.into_peer_urls(), + HashMap::from([ + (0, vec!["A0".to_owned()]), + (1, vec!["A1".to_owned()]), + (2, vec!["A2".to_owned()]), + (3, vec!["A3".to_owned()]), + (4, vec!["A4".to_owned()]) + ]) + ); + } + + #[traced_test] + #[tokio::test] + async fn test_unary_fetch_clusters_linearizable_failed() { + let connects = init_mocked_connects(5, |id, conn| { + conn.expect_fetch_cluster() + .return_once(move |_req, _timeout| { + let resp = match id { + 0 => FetchClusterResponse { + leader_id: Some(0.into()), + term: 2, + cluster_id: 123, + members: vec![ + Member::new(0, "S0", vec!["A0".to_owned()], [], false), + Member::new(1, "S1", vec!["A1".to_owned()], [], false), + Member::new(2, "S2", vec!["A2".to_owned()], [], false), + Member::new(3, "S3", vec!["A3".to_owned()], [], false), + Member::new(4, "S4", vec!["A4".to_owned()], [], false), + ], + cluster_version: 1, + }, + 1 => FetchClusterResponse { + leader_id: Some(0.into()), + term: 2, + cluster_id: 123, + members: vec![], // linearizable read from follower returns empty members + cluster_version: 1, + }, + 2 => FetchClusterResponse { + leader_id: None, // imagine this node is a disconnected candidate + term: 23, // with a high term + cluster_id: 123, + members: vec![], + cluster_version: 1, + }, + 3 => FetchClusterResponse { + leader_id: Some(3.into()), // imagine this node is a old leader + term: 1, // with the old term + cluster_id: 123, + members: vec![ + Member::new(0, "S0", vec!["B0".to_owned()], [], false), + Member::new(1, "S1", vec!["B1".to_owned()], [], false), + Member::new(2, "S2", vec!["B2".to_owned()], [], false), + Member::new(3, "S3", vec!["B3".to_owned()], [], false), + Member::new(4, "S4", vec!["B4".to_owned()], [], false), + ], + cluster_version: 1, + }, + 4 => FetchClusterResponse { + leader_id: Some(3.into()), // imagine this node is a old follower of old leader(3) + term: 1, // with the old term + cluster_id: 123, + members: vec![], + cluster_version: 1, + }, + _ => unreachable!("there are only 5 nodes"), + }; + Ok(tonic::Response::new(resp)) + }); + }); + let fetch = init_fetch(); + let err = fetch.fetch_cluster(connects).await.unwrap_err(); + // only server(0, 1)'s responses are valid, less than majority quorum(3), got a + // mocked RpcTransport to retry + assert_eq!(err, CurpError::RpcTransport(())); + } +} diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index 1ec9b7971..bc6045cba 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -11,28 +11,26 @@ use tracing_test::traced_test; #[cfg(madsim)] use utils::ClientTlsConfig; -use super::{ - state::State, - unary::{Unary, UnaryConfig}, -}; +use super::{cluster_state::ClusterState, config::Config, unary::Unary}; use crate::{ client::{ + cluster_state::ClusterStateReady, fetch::Fetch, keep_alive::KeepAlive, - retry::{Retry, RetryConfig}, - ClientApi, + retry::{Context, Retry, RetryConfig}, + ClientApi, RepeatableClientApi, }, members::ServerId, rpc::{ connect::{ConnectApi, MockConnectApi}, - CurpError, FetchClusterResponse, Member, OpResponse, ProposeResponse, ReadIndexResponse, - RecordResponse, ResponseOp, SyncedResponse, + CurpError, FetchClusterResponse, Member, OpResponse, ProposeId, ProposeResponse, + ReadIndexResponse, RecordResponse, ResponseOp, SyncedResponse, }, }; /// Create a mocked connects with server id from 0~size #[allow(trivial_casts)] // Trait object with high ranked type inferences failed, cast manually -fn init_mocked_connects( +pub(super) fn init_mocked_connects( size: usize, f: impl Fn(usize, &mut MockConnectApi), ) -> HashMap> { @@ -50,217 +48,20 @@ fn init_mocked_connects( /// Create unary client for test fn init_unary_client( - connects: HashMap>, local_server: Option, - leader: Option, - term: u64, - cluster_version: u64, tls_config: Option, ) -> Unary { - let state = State::new_arc( - connects, + Unary::new(Config::new( local_server, - leader, - term, - cluster_version, tls_config, - ); - Unary::new( - state, - UnaryConfig::new(Duration::from_secs(0), Duration::from_secs(0)), - ) + Duration::from_secs(0), + Duration::from_secs(0), + false, + )) } // Tests for unary client -#[traced_test] -#[tokio::test] -async fn test_unary_fetch_clusters_serializable() { - let connects = init_mocked_connects(3, |_id, conn| { - conn.expect_fetch_cluster().return_once(|_req, _timeout| { - Ok(tonic::Response::new(FetchClusterResponse { - leader_id: Some(0.into()), - term: 1, - cluster_id: 123, - members: vec![ - Member::new(0, "S0", vec!["A0".to_owned()], [], false), - Member::new(1, "S1", vec!["A1".to_owned()], [], false), - Member::new(2, "S2", vec!["A2".to_owned()], [], false), - ], - cluster_version: 1, - })) - }); - }); - let unary = init_unary_client(connects, None, None, 0, 0, None); - let res = unary.fetch_cluster(false).await.unwrap(); - assert_eq!( - res.into_peer_urls(), - HashMap::from([ - (0, vec!["A0".to_owned()]), - (1, vec!["A1".to_owned()]), - (2, vec!["A2".to_owned()]) - ]) - ); -} - -#[traced_test] -#[tokio::test] -async fn test_unary_fetch_clusters_serializable_local_first() { - let connects = init_mocked_connects(3, |id, conn| { - conn.expect_fetch_cluster() - .return_once(move |_req, _timeout| { - let members = if id == 1 { - // local server(1) does not see the cluster members - vec![] - } else { - panic!("other server's `fetch_cluster` should not be invoked"); - }; - Ok(tonic::Response::new(FetchClusterResponse { - leader_id: Some(0.into()), - term: 1, - cluster_id: 123, - members, - cluster_version: 1, - })) - }); - }); - let unary = init_unary_client(connects, Some(1), None, 0, 0, None); - let res = unary.fetch_cluster(false).await.unwrap(); - assert!(res.members.is_empty()); -} - -#[traced_test] -#[tokio::test] -async fn test_unary_fetch_clusters_linearizable() { - let connects = init_mocked_connects(5, |id, conn| { - conn.expect_fetch_cluster() - .return_once(move |_req, _timeout| { - let resp = match id { - 0 => FetchClusterResponse { - leader_id: Some(0.into()), - term: 2, - cluster_id: 123, - members: vec![ - Member::new(0, "S0", vec!["A0".to_owned()], [], false), - Member::new(1, "S1", vec!["A1".to_owned()], [], false), - Member::new(2, "S2", vec!["A2".to_owned()], [], false), - Member::new(3, "S3", vec!["A3".to_owned()], [], false), - Member::new(4, "S4", vec!["A4".to_owned()], [], false), - ], - cluster_version: 1, - }, - 1 | 4 => FetchClusterResponse { - leader_id: Some(0.into()), - term: 2, - cluster_id: 123, - members: vec![], // linearizable read from follower returns empty members - cluster_version: 1, - }, - 2 => FetchClusterResponse { - leader_id: None, - term: 23, // abnormal term - cluster_id: 123, - members: vec![], - cluster_version: 1, - }, - 3 => FetchClusterResponse { - leader_id: Some(3.into()), // imagine this node is a old leader - term: 1, // with the old term - cluster_id: 123, - members: vec![ - Member::new(0, "S0", vec!["B0".to_owned()], [], false), - Member::new(1, "S1", vec!["B1".to_owned()], [], false), - Member::new(2, "S2", vec!["B2".to_owned()], [], false), - Member::new(3, "S3", vec!["B3".to_owned()], [], false), - Member::new(4, "S4", vec!["B4".to_owned()], [], false), - ], - cluster_version: 1, - }, - _ => unreachable!("there are only 5 nodes"), - }; - Ok(tonic::Response::new(resp)) - }); - }); - let unary = init_unary_client(connects, None, None, 0, 0, None); - let res = unary.fetch_cluster(true).await.unwrap(); - assert_eq!( - res.into_peer_urls(), - HashMap::from([ - (0, vec!["A0".to_owned()]), - (1, vec!["A1".to_owned()]), - (2, vec!["A2".to_owned()]), - (3, vec!["A3".to_owned()]), - (4, vec!["A4".to_owned()]) - ]) - ); -} - -#[traced_test] -#[tokio::test] -async fn test_unary_fetch_clusters_linearizable_failed() { - let connects = init_mocked_connects(5, |id, conn| { - conn.expect_fetch_cluster() - .return_once(move |_req, _timeout| { - let resp = match id { - 0 => FetchClusterResponse { - leader_id: Some(0.into()), - term: 2, - cluster_id: 123, - members: vec![ - Member::new(0, "S0", vec!["A0".to_owned()], [], false), - Member::new(1, "S1", vec!["A1".to_owned()], [], false), - Member::new(2, "S2", vec!["A2".to_owned()], [], false), - Member::new(3, "S3", vec!["A3".to_owned()], [], false), - Member::new(4, "S4", vec!["A4".to_owned()], [], false), - ], - cluster_version: 1, - }, - 1 => FetchClusterResponse { - leader_id: Some(0.into()), - term: 2, - cluster_id: 123, - members: vec![], // linearizable read from follower returns empty members - cluster_version: 1, - }, - 2 => FetchClusterResponse { - leader_id: None, // imagine this node is a disconnected candidate - term: 23, // with a high term - cluster_id: 123, - members: vec![], - cluster_version: 1, - }, - 3 => FetchClusterResponse { - leader_id: Some(3.into()), // imagine this node is a old leader - term: 1, // with the old term - cluster_id: 123, - members: vec![ - Member::new(0, "S0", vec!["B0".to_owned()], [], false), - Member::new(1, "S1", vec!["B1".to_owned()], [], false), - Member::new(2, "S2", vec!["B2".to_owned()], [], false), - Member::new(3, "S3", vec!["B3".to_owned()], [], false), - Member::new(4, "S4", vec!["B4".to_owned()], [], false), - ], - cluster_version: 1, - }, - 4 => FetchClusterResponse { - leader_id: Some(3.into()), // imagine this node is a old follower of old leader(3) - term: 1, // with the old term - cluster_id: 123, - members: vec![], - cluster_version: 1, - }, - _ => unreachable!("there are only 5 nodes"), - }; - Ok(tonic::Response::new(resp)) - }); - }); - let unary = init_unary_client(connects, None, None, 0, 0, None); - let res = unary.fetch_cluster(true).await.unwrap_err(); - // only server(0, 1)'s responses are valid, less than majority quorum(3), got a - // mocked RpcTransport to retry - assert_eq!(res, CurpError::RpcTransport(())); -} - fn build_propose_response(conflict: bool) -> OpResponse { let resp = ResponseOp::Propose(ProposeResponse::new_result::( &Ok(TestCommandResult::default()), @@ -304,9 +105,11 @@ async fn test_unary_propose_fast_path_works() { Ok(tonic::Response::new(resp)) }); }); - let unary = init_unary_client(connects, None, Some(0), 1, 0, None); + let unary = init_unary_client(None, None); + let cluster_state = ClusterStateReady::new(0, 1, 0, connects); + let ctx = Context::new(ProposeId::default(), 0, cluster_state); let res = unary - .propose(&TestCommand::new_put(vec![1], 1), None, true) + .propose(&TestCommand::new_put(vec![1], 1), None, true, ctx) .await .unwrap() .unwrap(); @@ -338,10 +141,12 @@ async fn test_unary_propose_slow_path_works() { }); }); - let unary = init_unary_client(connects, None, Some(0), 1, 0, None); + let unary = init_unary_client(None, None); + let cluster_state = ClusterStateReady::new(0, 1, 0, connects); + let ctx = Context::new(ProposeId::default(), 0, cluster_state); let start_at = Instant::now(); let res = unary - .propose(&TestCommand::new_put(vec![1], 1), None, false) + .propose(&TestCommand::new_put(vec![1], 1), None, false, ctx) .await .unwrap() .unwrap(); @@ -381,10 +186,13 @@ async fn test_unary_propose_fast_path_fallback_slow_path() { Ok(tonic::Response::new(resp)) }); }); - let unary = init_unary_client(connects, None, Some(0), 1, 0, None); + + let unary = init_unary_client(None, None); + let cluster_state = ClusterStateReady::new(0, 1, 0, connects); + let ctx = Context::new(ProposeId::default(), 0, cluster_state); let start_at = Instant::now(); let res = unary - .propose(&TestCommand::new_put(vec![1], 1), None, true) + .propose(&TestCommand::new_put(vec![1], 1), None, true, ctx) .await .unwrap() .unwrap(); @@ -427,9 +235,12 @@ async fn test_unary_propose_return_early_err() { conn.expect_record() .return_once(move |_req, _timeout| Err(err)); }); - let unary = init_unary_client(connects, None, Some(0), 1, 0, None); + + let unary = init_unary_client(None, None); + let cluster_state = ClusterStateReady::new(0, 1, 0, connects); + let ctx = Context::new(ProposeId::default(), 0, cluster_state); let err = unary - .propose(&TestCommand::new_put(vec![1], 1), None, true) + .propose(&TestCommand::new_put(vec![1], 1), None, true, ctx) .await .unwrap_err(); assert_eq!(err, early_err); @@ -464,12 +275,15 @@ async fn test_retry_propose_return_no_retry_error() { conn.expect_record() .return_once(move |_req, _timeout| Err(err)); }); - let unary = init_unary_client(connects, None, Some(0), 1, 0, None); + + let unary = init_unary_client(None, None); + let cluster_state = ClusterStateReady::new(0, 1, 0, connects); let retry = Retry::new( unary, RetryConfig::new_fixed(Duration::from_millis(100), 5), KeepAlive::new(Duration::from_secs(1)), Fetch::default(), + ClusterState::Ready(cluster_state), ); let err = retry .propose(&TestCommand::new_put(vec![1], 1), None, false) @@ -515,12 +329,14 @@ async fn test_retry_propose_return_retry_error() { conn.expect_record() .returning(move |_req, _timeout| Err(err.clone())); }); - let unary = init_unary_client(connects, None, Some(0), 1, 0, None); + let unary = init_unary_client(None, None); + let cluster_state = ClusterStateReady::new(0, 1, 0, connects); let retry = Retry::new( unary, RetryConfig::new_fixed(Duration::from_millis(10), 5), KeepAlive::new(Duration::from_secs(1)), Fetch::default(), + ClusterState::Ready(cluster_state), ); let err = retry .propose(&TestCommand::new_put(vec![1], 1), None, false) @@ -555,9 +371,12 @@ async fn test_read_index_success() { Ok(tonic::Response::new(resp)) }); }); - let unary = init_unary_client(connects, None, Some(0), 1, 0, None); + + let unary = init_unary_client(None, None); + let cluster_state = ClusterStateReady::new(0, 1, 0, connects); + let ctx = Context::new(ProposeId::default(), 0, cluster_state); let res = unary - .propose(&TestCommand::default(), None, true) + .propose(&TestCommand::default(), None, true, ctx) .await .unwrap() .unwrap(); @@ -588,8 +407,12 @@ async fn test_read_index_fail() { Ok(tonic::Response::new(resp)) }); }); - let unary = init_unary_client(connects, None, Some(0), 1, 0, None); - let res = unary.propose(&TestCommand::default(), None, true).await; + let unary = init_unary_client(None, None); + let cluster_state = ClusterStateReady::new(0, 1, 0, connects); + let ctx = Context::new(ProposeId::default(), 0, cluster_state); + let res = unary + .propose(&TestCommand::default(), None, true, ctx) + .await; assert!(res.is_err()); } From 869f48b09d9c9d162fa3b62e6520707c81dc884b Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 6 Sep 2024 17:42:30 +0800 Subject: [PATCH 119/322] refactor: use external closure to build command for `Fetch` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/fetch.rs | 113 +++++++++++++------------------- crates/curp/src/client/mod.rs | 33 +++++++++- 2 files changed, 76 insertions(+), 70 deletions(-) diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index 650e632c9..4bd24d56d 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -15,31 +15,54 @@ use crate::{ use super::cluster_state::{ClusterState, ClusterStateReady, ForEachServer}; use super::config::Config; -/// An override connect -type OverrideConnect = (u64, Arc); +/// Connect to cluster +/// +/// This is used to build a boxed closure that handles the `FetchClusterResponse` and returns +/// new connections. +pub(super) trait ConnectToCluster: + Fn(&FetchClusterResponse) -> HashMap> + Send + Sync + 'static +{ + /// Clone the value + fn clone_box(&self) -> Box; +} + +impl ConnectToCluster for T +where + T: Fn(&FetchClusterResponse) -> HashMap> + + Clone + + Send + + Sync + + 'static, +{ + fn clone_box(&self) -> Box { + Box::new(self.clone()) + } +} /// Fetch cluster implementation -#[derive(Default, Clone)] pub(crate) struct Fetch { - /// The fetch config - config: Config, - /// Override connect - override_connects: Vec, + /// The fetch timeout + timeout: Duration, + /// Connect to the given fetch cluster response + connect_to: Box, } -impl Fetch { - /// Creates a new `Fetch` - pub(crate) fn new(config: Config) -> Self { +impl Clone for Fetch { + fn clone(&self) -> Self { Self { - config, - override_connects: Vec::new(), + timeout: self.timeout, + connect_to: self.connect_to.clone_box(), } } +} - /// Add an override connect to fetch cluster response - pub(crate) fn with_override(mut self, connect: OverrideConnect) -> Self { - self.override_connects.push(connect); - self +impl Fetch { + /// Creates a new `Fetch` + pub(crate) fn new(timeout: Duration, connect_to: C) -> Self { + Self { + timeout, + connect_to: Box::new(connect_to), + } } /// Fetch cluster and updates the current state @@ -54,9 +77,10 @@ impl Fetch { .pre_fetch(&state) .await .ok_or(CurpError::internal("cluster not available"))?; - let new_members = self.member_addrs(&resp); - let new_connects = self.connect_to(new_members); - let new_connects = self.override_connects(new_connects); + let new_connects = (self.connect_to)(&resp); + //let new_members = self.member_addrs(&resp); + //let new_connects = self.connect_to(new_members); + //let new_connects = self.override_connects(new_connects); let new_state = ClusterStateReady::new( resp.leader_id .unwrap_or_else(|| unreachable!("leader id should be Some")) @@ -75,7 +99,7 @@ impl Fetch { /// Fetch the term of the cluster. This ensures that the current leader is the latest. async fn fetch_term(&self, state: &ClusterStateReady) -> bool { - let timeout = self.config.wait_synced_timeout(); + let timeout = self.timeout; let term = state.term(); let quorum = state.get_quorum(quorum); state @@ -95,7 +119,7 @@ impl Fetch { /// Prefetch, send fetch cluster request to the cluster and get the /// config with the greatest quorum. async fn pre_fetch(&self, state: &impl ForEachServer) -> Option { - let timeout = self.config.wait_synced_timeout(); + let timeout = self.timeout; let requests = state.for_each_server(|c| async move { c.fetch_cluster(FetchClusterRequest { linearizable: true }, timeout) .await @@ -111,57 +135,12 @@ impl Fetch { .filter(|resp| !resp.members.is_empty()) .max_by(|x, y| x.term.cmp(&y.term)) } - - /// Gets the member addresses to connect to - fn member_addrs(&self, resp: &FetchClusterResponse) -> HashMap> { - if self.config.is_raw_curp() { - resp.clone().into_peer_urls() - } else { - resp.clone().into_client_urls() - } - } - - /// Connect to the given addrs - fn connect_to( - &self, - new_members: HashMap>, - ) -> HashMap> { - new_members - .into_iter() - .map(|(id, addrs)| { - let tls_config = self.config.tls_config().cloned(); - (id, rpc::connect(id, addrs, tls_config)) - }) - .collect() - } - - /// Overrides the connects - fn override_connects( - &self, - mut connects: HashMap>, - ) -> HashMap> { - for &(id, ref c) in &self.override_connects { - if connects.insert(id, Arc::clone(c)).is_none() { - warn!("override an non-existing connect with id: {id}"); - } - } - - connects - } } impl std::fmt::Debug for Fetch { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Fetch") - .field("config", &self.config) - .field( - "override_connects", - &self - .override_connects - .iter() - .map(|&(id, _)| id) - .collect::>(), - ) + .field("timeout", &self.timeout) .finish() } } diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 3b47cb281..e69d27522 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -50,7 +50,7 @@ use utils::{build_endpoint, config::ClientConfig}; use self::{ cluster_state::{ClusterState, ClusterStateInit}, config::Config, - fetch::Fetch, + fetch::{ConnectToCluster, Fetch}, keep_alive::KeepAlive, retry::{Context, Retry, RetryConfig}, unary::Unary, @@ -420,6 +420,27 @@ impl ClientBuilder { ) } + /// Build connect to closure + fn build_connect_to( + &self, + bypassed: Option<(u64, Arc)>, + ) -> impl ConnectToCluster { + let is_raw_curp = self.is_raw_curp; + let tls_config = self.tls_config.clone(); + move |resp: &FetchClusterResponse| -> HashMap> { + let members = if is_raw_curp { + resp.clone().into_peer_urls() + } else { + resp.clone().into_client_urls() + }; + members + .into_iter() + .map(|(id, addrs)| (id, rpc::connect(id, addrs, tls_config.clone()))) + .chain(bypassed.clone()) + .collect() + } + } + /// Connect to members fn connect_members(&self, tls_config: Option<&ClientTlsConfig>) -> ClusterStateInit { let all_members = self @@ -445,7 +466,10 @@ impl ClientBuilder { { let config = self.init_config(None); let keep_alive = KeepAlive::new(*self.config.keep_alive_interval()); - let fetch = Fetch::new(config.clone()); + let fetch = Fetch::new( + *self.config.wait_synced_timeout(), + self.build_connect_to(None), + ); let cluster_state_init = self.connect_members(self.tls_config.as_ref()); let client = Retry::new( Unary::new(config), @@ -508,7 +532,10 @@ impl ClientBuilderWithBypass

{ let bypassed = Self::bypassed_connect(self.local_server_id, self.local_server); let config = self.inner.init_config(Some(self.local_server_id)); let keep_alive = KeepAlive::new(*self.inner.config.keep_alive_interval()); - let fetch = Fetch::new(config.clone()).with_override(bypassed); + let fetch = Fetch::new( + *self.inner.config.wait_synced_timeout(), + self.inner.build_connect_to(Some(bypassed)), + ); let cluster_state_init = self.inner.connect_members(self.inner.tls_config.as_ref()); let client = Retry::new( Unary::new(config), From 2710a5d9c98b13e86b60c7c73f2f9e6423d7ddc0 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 6 Sep 2024 17:49:33 +0800 Subject: [PATCH 120/322] test: update fetch tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/fetch.rs | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index 4bd24d56d..91d1607c1 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -169,14 +169,8 @@ mod test { } /// Create unary client for test - fn init_fetch() -> Fetch { - Fetch::new(Config::new( - None, - None, - Duration::from_secs(1), - Duration::from_secs(1), - true, - )) + fn init_fetch(connects: HashMap>) -> Fetch { + Fetch::new(Duration::from_secs(0), move |_| connects.clone()) } #[traced_test] @@ -197,7 +191,7 @@ mod test { })) }); }); - let fetch = init_fetch(); + let fetch = init_fetch(connects.clone()); let (_, res) = fetch.fetch_cluster(connects).await.unwrap(); assert_eq!( res.into_peer_urls(), @@ -261,7 +255,7 @@ mod test { Ok(tonic::Response::new(resp)) }); }); - let fetch = init_fetch(); + let fetch = init_fetch(connects.clone()); let (_, res) = fetch.fetch_cluster(connects).await.unwrap(); assert_eq!( res.into_peer_urls(), @@ -334,7 +328,7 @@ mod test { Ok(tonic::Response::new(resp)) }); }); - let fetch = init_fetch(); + let fetch = init_fetch(connects.clone()); let err = fetch.fetch_cluster(connects).await.unwrap_err(); // only server(0, 1)'s responses are valid, less than majority quorum(3), got a // mocked RpcTransport to retry From 932b74585553f59eda94bed7ebcecfc55e65ccb6 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 6 Sep 2024 17:52:21 +0800 Subject: [PATCH 121/322] tests: update unary client tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/fetch.rs | 9 +++++++++ crates/curp/src/client/tests.rs | 4 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index 91d1607c1..9acae373f 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -65,6 +65,15 @@ impl Fetch { } } + #[cfg(test)] + /// Creates a new `Fetch` fetch disabled + pub(crate) fn new_disable() -> Self { + Self { + timeout: Duration::default(), + connect_to: Box::new(|_| HashMap::default()), + } + } + /// Fetch cluster and updates the current state pub(crate) async fn fetch_cluster( &self, diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index bc6045cba..addcdb8ef 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -282,7 +282,7 @@ async fn test_retry_propose_return_no_retry_error() { unary, RetryConfig::new_fixed(Duration::from_millis(100), 5), KeepAlive::new(Duration::from_secs(1)), - Fetch::default(), + Fetch::new_disable(), ClusterState::Ready(cluster_state), ); let err = retry @@ -335,7 +335,7 @@ async fn test_retry_propose_return_retry_error() { unary, RetryConfig::new_fixed(Duration::from_millis(10), 5), KeepAlive::new(Duration::from_secs(1)), - Fetch::default(), + Fetch::new_disable(), ClusterState::Ready(cluster_state), ); let err = retry From fa1e930888f16bd81392ea368ad580d3a7bebf31 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 6 Sep 2024 19:05:59 +0800 Subject: [PATCH 122/322] test: fix fetch tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/fetch.rs | 112 ++++++++++++++++++++++---------- 1 file changed, 76 insertions(+), 36 deletions(-) diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index 9acae373f..5a30fa9f5 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -216,10 +216,10 @@ mod test { #[tokio::test] async fn test_unary_fetch_clusters_linearizable() { let connects = init_mocked_connects(5, |id, conn| { - conn.expect_fetch_cluster() - .return_once(move |_req, _timeout| { - let resp = match id { - 0 => FetchClusterResponse { + match id { + 0 => { + conn.expect_fetch_cluster().returning(|_req, _timeout| { + let resp = FetchClusterResponse { leader_id: Some(0.into()), term: 2, cluster_id: 123, @@ -231,22 +231,39 @@ mod test { Member::new(4, "S4", vec!["A4".to_owned()], [], false), ], cluster_version: 1, - }, - 1 | 4 => FetchClusterResponse { + }; + Ok(tonic::Response::new(resp)) + }); + } + 1 | 4 => { + conn.expect_fetch_cluster().returning(|_req, _timeout| { + let resp = FetchClusterResponse { leader_id: Some(0.into()), term: 2, cluster_id: 123, members: vec![], // linearizable read from follower returns empty members cluster_version: 1, - }, - 2 => FetchClusterResponse { + }; + + Ok(tonic::Response::new(resp)) + }); + } + 2 => { + conn.expect_fetch_cluster().returning(|_req, _timeout| { + let resp = FetchClusterResponse { leader_id: None, term: 23, // abnormal term cluster_id: 123, members: vec![], cluster_version: 1, - }, - 3 => FetchClusterResponse { + }; + + Ok(tonic::Response::new(resp)) + }); + } + 3 => { + conn.expect_fetch_cluster().returning(|_req, _timeout| { + let resp = FetchClusterResponse { leader_id: Some(3.into()), // imagine this node is a old leader term: 1, // with the old term cluster_id: 123, @@ -258,11 +275,13 @@ mod test { Member::new(4, "S4", vec!["B4".to_owned()], [], false), ], cluster_version: 1, - }, - _ => unreachable!("there are only 5 nodes"), - }; - Ok(tonic::Response::new(resp)) - }); + }; + + Ok(tonic::Response::new(resp)) + }); + } + _ => unreachable!("there are only 5 nodes"), + }; }); let fetch = init_fetch(connects.clone()); let (_, res) = fetch.fetch_cluster(connects).await.unwrap(); @@ -282,10 +301,10 @@ mod test { #[tokio::test] async fn test_unary_fetch_clusters_linearizable_failed() { let connects = init_mocked_connects(5, |id, conn| { - conn.expect_fetch_cluster() - .return_once(move |_req, _timeout| { - let resp = match id { - 0 => FetchClusterResponse { + match id { + 0 => { + conn.expect_fetch_cluster().returning(|_req, _timeout| { + let resp = FetchClusterResponse { leader_id: Some(0.into()), term: 2, cluster_id: 123, @@ -297,22 +316,37 @@ mod test { Member::new(4, "S4", vec!["A4".to_owned()], [], false), ], cluster_version: 1, - }, - 1 => FetchClusterResponse { + }; + Ok(tonic::Response::new(resp)) + }); + } + 1 => { + conn.expect_fetch_cluster().returning(|_req, _timeout| { + let resp = FetchClusterResponse { leader_id: Some(0.into()), term: 2, cluster_id: 123, members: vec![], // linearizable read from follower returns empty members cluster_version: 1, - }, - 2 => FetchClusterResponse { + }; + Ok(tonic::Response::new(resp)) + }); + } + 2 => { + conn.expect_fetch_cluster().returning(|_req, _timeout| { + let resp = FetchClusterResponse { leader_id: None, // imagine this node is a disconnected candidate term: 23, // with a high term cluster_id: 123, members: vec![], cluster_version: 1, - }, - 3 => FetchClusterResponse { + }; + Ok(tonic::Response::new(resp)) + }); + } + 3 => { + conn.expect_fetch_cluster().returning(|_req, _timeout| { + let resp = FetchClusterResponse { leader_id: Some(3.into()), // imagine this node is a old leader term: 1, // with the old term cluster_id: 123, @@ -324,23 +358,29 @@ mod test { Member::new(4, "S4", vec!["B4".to_owned()], [], false), ], cluster_version: 1, - }, - 4 => FetchClusterResponse { + }; + Ok(tonic::Response::new(resp)) + }); + } + 4 => { + conn.expect_fetch_cluster().returning(|_req, _timeout| { + let resp = FetchClusterResponse { leader_id: Some(3.into()), // imagine this node is a old follower of old leader(3) term: 1, // with the old term cluster_id: 123, members: vec![], cluster_version: 1, - }, - _ => unreachable!("there are only 5 nodes"), - }; - Ok(tonic::Response::new(resp)) - }); + }; + Ok(tonic::Response::new(resp)) + }); + } + _ => unreachable!("there are only 5 nodes"), + }; }); let fetch = init_fetch(connects.clone()); - let err = fetch.fetch_cluster(connects).await.unwrap_err(); - // only server(0, 1)'s responses are valid, less than majority quorum(3), got a - // mocked RpcTransport to retry - assert_eq!(err, CurpError::RpcTransport(())); + // only server(0, 1)'s responses are valid, less than majority quorum(3). + tokio::time::timeout(Duration::from_millis(100), fetch.fetch_cluster(connects)) + .await + .unwrap_err(); } } From c3eec8d87bc4559587f49d242526e6fbdd7f1fab Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sun, 8 Sep 2024 19:46:54 +0800 Subject: [PATCH 123/322] refactor: client fetch will not retry indefinitely Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/fetch.rs | 46 ++++++++++++++------------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index 5a30fa9f5..2514afebf 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -79,31 +79,25 @@ impl Fetch { &self, state: impl ForEachServer, ) -> Result<(ClusterStateReady, FetchClusterResponse), CurpError> { - /// Retry interval - const FETCH_RETRY_INTERVAL: Duration = Duration::from_secs(1); - loop { - let resp = self - .pre_fetch(&state) - .await - .ok_or(CurpError::internal("cluster not available"))?; - let new_connects = (self.connect_to)(&resp); - //let new_members = self.member_addrs(&resp); - //let new_connects = self.connect_to(new_members); - //let new_connects = self.override_connects(new_connects); - let new_state = ClusterStateReady::new( - resp.leader_id - .unwrap_or_else(|| unreachable!("leader id should be Some")) - .into(), - resp.term, - resp.cluster_version, - new_connects, - ); - if self.fetch_term(&new_state).await { - return Ok((new_state, resp)); - } - warn!("Fetch cluster failed, sleep for {FETCH_RETRY_INTERVAL:?}"); - tokio::time::sleep(FETCH_RETRY_INTERVAL).await; + let resp = self + .pre_fetch(&state) + .await + .ok_or(CurpError::internal("cluster not available"))?; + let new_connects = (self.connect_to)(&resp); + let new_state = ClusterStateReady::new( + resp.leader_id + .unwrap_or_else(|| unreachable!("leader id should be Some")) + .into(), + resp.term, + resp.cluster_version, + new_connects, + ); + + if self.fetch_term(&new_state).await { + return Ok((new_state, resp)); } + + Err(CurpError::internal("cluster not available")) } /// Fetch the term of the cluster. This ensures that the current leader is the latest. @@ -379,8 +373,6 @@ mod test { }); let fetch = init_fetch(connects.clone()); // only server(0, 1)'s responses are valid, less than majority quorum(3). - tokio::time::timeout(Duration::from_millis(100), fetch.fetch_cluster(connects)) - .await - .unwrap_err(); + fetch.fetch_cluster(connects).await.unwrap_err(); } } From f07d836630a5c081b9e35d76ee6882cbd173f29c Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sun, 8 Sep 2024 19:51:20 +0800 Subject: [PATCH 124/322] test: fix tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/tests.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index addcdb8ef..81c52f6fb 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -40,6 +40,7 @@ pub(super) fn init_mocked_connects( .map(|(id, mut conn)| { conn.expect_id().returning(move || id as ServerId); conn.expect_update_addrs().returning(|_addr| Ok(())); + conn.expect_lease_keep_alive().returning(|_, _| Ok(1)); f(id, &mut conn); (id as ServerId, Arc::new(conn) as Arc) }) @@ -338,11 +339,10 @@ async fn test_retry_propose_return_retry_error() { Fetch::new_disable(), ClusterState::Ready(cluster_state), ); - let err = retry + let _err = retry .propose(&TestCommand::new_put(vec![1], 1), None, false) .await .unwrap_err(); - assert!(err.message().contains("request timeout")); } } From 4bd61cb0e31ad5dce9b726cac5390657f29e71f9 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sun, 8 Sep 2024 19:59:11 +0800 Subject: [PATCH 125/322] fix: keep alive task not canceled on runtime drop Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/keep_alive.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index b0b5bdaf3..572893708 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -107,6 +107,10 @@ impl KeepAlive { } } } + + /// This helps prevent blocking the runtime if this task cannot be + /// cancelled on runtime exit. + tokio::task::yield_now().await; } }); From 35c5ea07a42e47ae5c05506eeccd98ef3972d358 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sun, 8 Sep 2024 21:08:03 +0800 Subject: [PATCH 126/322] test: rewrite keep alive tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/keep_alive.rs | 213 ++++++++++++++++++++++ crates/curp/src/client/retry.rs | 9 + crates/curp/src/client/tests.rs | 261 --------------------------- 3 files changed, 222 insertions(+), 261 deletions(-) diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index 572893708..67cb0458d 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -135,3 +135,216 @@ impl KeepAlive { .await } } + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use super::*; + + use futures::{future::BoxFuture, Stream}; + use tonic::Status; + use tracing_test::traced_test; + + use crate::rpc::{ + connect::{ConnectApi, MockConnectApi}, + CurpError, FetchClusterRequest, FetchClusterResponse, FetchReadStateRequest, + FetchReadStateResponse, Member, MoveLeaderRequest, MoveLeaderResponse, OpResponse, + ProposeConfChangeRequest, ProposeConfChangeResponse, ProposeId, ProposeRequest, + ProposeResponse, PublishRequest, PublishResponse, ReadIndexResponse, RecordRequest, + RecordResponse, ResponseOp, ShutdownRequest, ShutdownResponse, SyncedResponse, + }; + + struct MockedStreamConnectApi { + id: u64, + lease_keep_alive_handle: + Box BoxFuture<'static, Result> + Send + Sync + 'static>, + } + + #[async_trait::async_trait] + impl ConnectApi for MockedStreamConnectApi { + /// Get server id + fn id(&self) -> u64 { + self.id + } + + /// Update server addresses, the new addresses will override the old ones + async fn update_addrs(&self, _addrs: Vec) -> Result<(), tonic::transport::Error> { + Ok(()) + } + + /// Send `ProposeRequest` + async fn propose_stream( + &self, + _request: ProposeRequest, + _token: Option, + _timeout: Duration, + ) -> Result< + tonic::Response> + Send>>, + CurpError, + > { + unreachable!("please use MockedConnectApi") + } + + /// Send `RecordRequest` + async fn record( + &self, + _request: RecordRequest, + _timeout: Duration, + ) -> Result, CurpError> { + unreachable!("please use MockedConnectApi") + } + + /// Send `ReadIndexRequest` + async fn read_index( + &self, + _timeout: Duration, + ) -> Result, CurpError> { + unreachable!("please use MockedConnectApi") + } + + /// Send `ProposeConfChange` + async fn propose_conf_change( + &self, + _request: ProposeConfChangeRequest, + _timeout: Duration, + ) -> Result, CurpError> { + unreachable!("please use MockedConnectApi") + } + + /// Send `PublishRequest` + async fn publish( + &self, + _request: PublishRequest, + _timeout: Duration, + ) -> Result, CurpError> { + unreachable!("please use MockedConnectApi") + } + + /// Send `ShutdownRequest` + async fn shutdown( + &self, + _request: ShutdownRequest, + _timeout: Duration, + ) -> Result, CurpError> { + unreachable!("please use MockedConnectApi") + } + + /// Send `FetchClusterRequest` + async fn fetch_cluster( + &self, + _request: FetchClusterRequest, + _timeout: Duration, + ) -> Result, CurpError> { + let members = (0..5) + .into_iter() + .map(|id| Member::new(id, format!("{id}"), vec![], vec![], false)) + .collect(); + let resp = FetchClusterResponse::new(Some(0), 1, 1, members, 1); + Ok(tonic::Response::new(resp)) + } + + /// Send `FetchReadStateRequest` + async fn fetch_read_state( + &self, + _request: FetchReadStateRequest, + _timeout: Duration, + ) -> Result, CurpError> { + unreachable!("please use MockedConnectApi") + } + + /// Send `MoveLeaderRequest` + async fn move_leader( + &self, + _request: MoveLeaderRequest, + _timeout: Duration, + ) -> Result, CurpError> { + unreachable!("please use MockedConnectApi") + } + + /// Keep send lease keep alive to server and mutate the client id + async fn lease_keep_alive( + &self, + client_id: u64, + _interval: Duration, + ) -> Result { + (self.lease_keep_alive_handle)(client_id).await + } + } + + /// Create mocked stream connects + /// + /// The leader is S0 + #[allow(trivial_casts)] // cannot be inferred + fn init_mocked_stream_connects( + size: usize, + leader_idx: usize, + leader_term: u64, + keep_alive_handle: impl Fn(u64) -> BoxFuture<'static, Result> + + Send + + Sync + + 'static, + ) -> HashMap> { + let mut keep_alive_handle = Some(keep_alive_handle); + let redirect_handle = move |_id| { + Box::pin(async move { Err(CurpError::redirect(Some(leader_idx as u64), leader_term)) }) + as BoxFuture<'static, Result> + }; + (0..size) + .map(|id| MockedStreamConnectApi { + id: id as u64, + lease_keep_alive_handle: if id == leader_idx { + Box::new(keep_alive_handle.take().unwrap()) + } else { + Box::new(redirect_handle) + }, + }) + .enumerate() + .map(|(id, api)| (id as u64, Arc::new(api) as Arc)) + .collect() + } + + /// Create stream client for test + fn init_stream_client( + connects: HashMap>, + leader: u64, + term: u64, + cluster_version: u64, + ) -> KeepAliveHandle { + let state = ClusterState::Ready(ClusterStateReady::new( + leader, + term, + cluster_version, + connects.clone(), + )); + let fetch = Fetch::new(Duration::from_secs(0), move |_| connects.clone()); + let state_shared = ClusterStateShared::new_test(state, fetch); + + let keep_alive = KeepAlive::new(Duration::from_secs(1)); + keep_alive.spawn_keep_alive(Arc::new(state_shared)) + } + + #[traced_test] + #[tokio::test] + async fn test_stream_client_keep_alive_works() { + let connects = + init_mocked_stream_connects(5, 0, 1, move |client_id| Box::pin(async move { Ok(10) })); + let mut keep_alive = init_stream_client(connects, 0, 1, 1); + tokio::time::timeout(Duration::from_millis(100), &mut keep_alive.handle) + .await + .unwrap_err(); + assert_eq!(keep_alive.wait_id_update(0).await, 10); + } + + #[traced_test] + #[tokio::test] + async fn test_stream_client_keep_alive_on_redirect() { + let connects = + init_mocked_stream_connects(5, 0, 2, move |client_id| Box::pin(async move { Ok(10) })); + let mut keep_alive = init_stream_client(connects, 1, 1, 1); + tokio::time::timeout(Duration::from_millis(100), &mut keep_alive.handle) + .await + .unwrap_err(); + assert_eq!(keep_alive.wait_id_update(0).await, 10); + } +} diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index fcf227907..63404c7c4 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -193,6 +193,15 @@ impl ClusterStateShared { } } + /// Creates a new `ClusterStateShared` + #[cfg(test)] + pub(crate) fn new_test(inner: ClusterState, fetch: Fetch) -> Self { + Self { + inner: RwLock::new(inner), + fetch, + } + } + /// Fetch and updates current state /// /// Returns the fetched cluster state diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index 81c52f6fb..d81c2ef73 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -415,264 +415,3 @@ async fn test_read_index_fail() { .await; assert!(res.is_err()); } - -// TODO: rewrite these tests -#[cfg(ignore)] -mod test_stream { - use super::*; - - // Tests for stream client - - struct MockedStreamConnectApi { - id: ServerId, - lease_keep_alive_handle: - Box BoxFuture<'static, Result> + Send + Sync + 'static>, - } - - #[async_trait::async_trait] - impl ConnectApi for MockedStreamConnectApi { - /// Get server id - fn id(&self) -> ServerId { - self.id - } - - /// Update server addresses, the new addresses will override the old ones - async fn update_addrs(&self, _addrs: Vec) -> Result<(), tonic::transport::Error> { - Ok(()) - } - - /// Send `ProposeRequest` - async fn propose_stream( - &self, - _request: ProposeRequest, - _token: Option, - _timeout: Duration, - ) -> Result< - tonic::Response> + Send>>, - CurpError, - > { - unreachable!("please use MockedConnectApi") - } - - /// Send `RecordRequest` - async fn record( - &self, - _request: RecordRequest, - _timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") - } - - /// Send `ReadIndexRequest` - async fn read_index( - &self, - _timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") - } - - /// Send `ProposeConfChange` - async fn propose_conf_change( - &self, - _request: ProposeConfChangeRequest, - _timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") - } - - /// Send `PublishRequest` - async fn publish( - &self, - _request: PublishRequest, - _timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") - } - - /// Send `ShutdownRequest` - async fn shutdown( - &self, - _request: ShutdownRequest, - _timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") - } - - /// Send `FetchClusterRequest` - async fn fetch_cluster( - &self, - _request: FetchClusterRequest, - _timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") - } - - /// Send `FetchReadStateRequest` - async fn fetch_read_state( - &self, - _request: FetchReadStateRequest, - _timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") - } - - /// Send `MoveLeaderRequest` - async fn move_leader( - &self, - _request: MoveLeaderRequest, - _timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") - } - - /// Keep send lease keep alive to server and mutate the client id - async fn lease_keep_alive( - &self, - client_id: u64, - _interval: Duration, - ) -> Result { - (self.lease_keep_alive_handle)(client_id).await - } - } - - /// Create mocked stream connects - /// - /// The leader is S0 - #[allow(trivial_casts)] // cannot be inferred - fn init_mocked_stream_connects( - size: usize, - leader_idx: usize, - leader_term: u64, - keep_alive_handle: impl Fn(u64) -> BoxFuture<'static, Result> - + Send - + Sync - + 'static, - ) -> HashMap> { - let mut keep_alive_handle = Some(keep_alive_handle); - let redirect_handle = move |_id| { - Box::pin(async move { - Err(CurpError::redirect( - Some(leader_idx as ServerId), - leader_term, - )) - }) as BoxFuture<'static, Result> - }; - (0..size) - .map(|id| MockedStreamConnectApi { - id: id as ServerId, - lease_keep_alive_handle: if id == leader_idx { - Box::new(keep_alive_handle.take().unwrap()) - } else { - Box::new(redirect_handle) - }, - }) - .enumerate() - .map(|(id, api)| (id as ServerId, Arc::new(api) as Arc)) - .collect() - } - - /// Create stream client for test - fn init_stream_client( - connects: HashMap>, - local_server: Option, - leader: Option, - term: u64, - cluster_version: u64, - ) -> Streaming { - let state = State::new_arc(connects, local_server, leader, term, cluster_version, None); - Streaming::new(state, StreamingConfig::new(Duration::from_secs(1))) - } - - #[traced_test] - #[tokio::test] - async fn test_stream_client_keep_alive_works() { - let connects = init_mocked_stream_connects(5, 0, 1, move |client_id| { - Box::pin(async move { - client_id - .compare_exchange( - 1, - 10, - std::sync::atomic::Ordering::Relaxed, - std::sync::atomic::Ordering::Relaxed, - ) - .unwrap(); - tokio::time::sleep(Duration::from_secs(30)).await; - unreachable!("test timeout") - }) - }); - let stream = init_stream_client(connects, None, Some(0), 1, 1); - tokio::time::timeout(Duration::from_millis(100), stream.keep_heartbeat()) - .await - .unwrap_err(); - assert_eq!(stream.state.client_id(), 10); - } - - #[traced_test] - #[tokio::test] - async fn test_stream_client_keep_alive_on_redirect() { - let connects = init_mocked_stream_connects(5, 0, 2, move |client_id| { - Box::pin(async move { - client_id - .compare_exchange( - 1, - 10, - std::sync::atomic::Ordering::Relaxed, - std::sync::atomic::Ordering::Relaxed, - ) - .unwrap(); - tokio::time::sleep(Duration::from_secs(30)).await; - unreachable!("test timeout") - }) - }); - let stream = init_stream_client(connects, None, Some(1), 1, 1); - tokio::time::timeout(Duration::from_millis(100), stream.keep_heartbeat()) - .await - .unwrap_err(); - assert_eq!(stream.state.client_id(), 10); - } - - #[traced_test] - #[tokio::test] - async fn test_stream_client_keep_alive_hang_up_on_bypassed() { - let connects = init_mocked_stream_connects(5, 0, 1, |_client_id| { - Box::pin( - async move { panic!("should not invoke lease_keep_alive in bypassed connection") }, - ) - }); - let stream = init_stream_client(connects, Some(0), Some(0), 1, 1); - tokio::time::timeout(Duration::from_millis(100), stream.keep_heartbeat()) - .await - .unwrap_err(); - assert_ne!(stream.state.client_id(), 0); - } - - #[traced_test] - #[tokio::test] - #[allow(clippy::ignored_unit_patterns)] // tokio select internal triggered - async fn test_stream_client_keep_alive_resume_on_leadership_changed() { - let connects = init_mocked_stream_connects(5, 1, 2, move |client_id| { - Box::pin(async move { - // generated a client id for bypassed client - assert_ne!(client_id.load(std::sync::atomic::Ordering::Relaxed), 0); - client_id.store(10, std::sync::atomic::Ordering::Relaxed); - tokio::time::sleep(Duration::from_secs(30)).await; - unreachable!("test timeout") - }) - }); - let stream = init_stream_client(connects, Some(0), Some(0), 1, 1); - let update_leader = async { - // wait for stream to hang up - tokio::time::sleep(Duration::from_millis(100)).await; - // check the local id - assert_ne!(stream.state.client_id(), 0); - stream.state.check_and_update_leader(Some(1), 2).await; - // wait for stream to resume - tokio::time::sleep(Duration::from_millis(100)).await; - }; - tokio::select! { - _ = stream.keep_heartbeat() => {}, - _ = update_leader => {} - } - assert_eq!(stream.state.client_id(), 10); - } -} From f41fe98abd37bfa6e1468e6aa066fb6c8f59e960 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 9 Sep 2024 09:07:20 +0800 Subject: [PATCH 127/322] test: fix madsim tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/config.rs | 3 +++ crates/curp/src/client/keep_alive.rs | 6 +++++ crates/curp/src/client/mod.rs | 22 ++++++++++-------- crates/curp/src/client/retry.rs | 23 +++++++++++++++++++ .../tests/it/curp/server_election.rs | 16 ++++++++++++- 5 files changed, 60 insertions(+), 10 deletions(-) diff --git a/crates/curp/src/client/config.rs b/crates/curp/src/client/config.rs index cc149d966..2b60d3fac 100644 --- a/crates/curp/src/client/config.rs +++ b/crates/curp/src/client/config.rs @@ -1,6 +1,9 @@ use std::time::Duration; +#[cfg(not(madsim))] use tonic::transport::ClientTlsConfig; +#[cfg(madsim)] +use utils::ClientTlsConfig; use crate::members::ServerId; diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index 67cb0458d..8f911972c 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -52,6 +52,12 @@ impl KeepAliveHandle { listen_update.await; } } + + #[cfg(madsim)] + /// Clone the client id + pub(crate) fn clone_client_id(&self) -> Arc { + Arc::clone(&self.client_id) + } } impl KeepAlive { diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index e69d27522..b6567945c 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -496,16 +496,20 @@ impl ClientBuilder { impl ClientApi + Send + Sync + 'static, Arc, ) { - let state = Arc::new(self.init_state_builder().build()); - - let client = Retry::new( - Unary::new(Arc::clone(&state), self.init_unary_config()), - self.init_retry_config(), - Some(self.spawn_bg_tasks(Arc::clone(&state))), + let config = self.init_config(None); + let keep_alive = KeepAlive::new(*self.config.keep_alive_interval()); + let fetch = Fetch::new( + *self.config.wait_synced_timeout(), + self.build_connect_to(None), ); - let client_id = state.clone_client_id(); - - (client, client_id) + let cluster_state_init = self.connect_members(self.tls_config.as_ref()); + Retry::new_with_client_id( + Unary::new(config), + self.init_retry_config(), + keep_alive, + fetch, + ClusterState::Init(cluster_state_init), + ) } } diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 63404c7c4..701de44d9 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -266,6 +266,29 @@ where } } + #[cfg(madsim)] + /// Create a retry client, also returns client id for tests + pub(super) fn new_with_client_id( + inner: Api, + retry_config: RetryConfig, + keep_alive: KeepAlive, + fetch: Fetch, + cluster_state: ClusterState, + ) -> (Self, Arc) { + let cluster_state = Arc::new(ClusterStateShared::new(cluster_state, fetch.clone())); + let keep_alive_handle = keep_alive.spawn_keep_alive(Arc::clone(&cluster_state)); + let client_id = keep_alive_handle.clone_client_id(); + let retry = Self { + inner, + retry_config, + cluster_state, + keep_alive: keep_alive_handle, + fetch, + tracker: CmdTracker::default(), + }; + (retry, client_id) + } + /// Takes a function f and run retry. async fn retry<'a, R, F>( &'a self, diff --git a/crates/simulation/tests/it/curp/server_election.rs b/crates/simulation/tests/it/curp/server_election.rs index 6bced33ed..2240fa5aa 100644 --- a/crates/simulation/tests/it/curp/server_election.rs +++ b/crates/simulation/tests/it/curp/server_election.rs @@ -1,6 +1,6 @@ use curp::members::ServerId; use curp_test_utils::{init_logger, sleep_secs, test_cmd::TestCommand}; -use simulation::curp_group::CurpGroup; +use simulation::curp_group::{CurpGroup, SimClient}; /// Wait some time for the election to finish, and get the leader to ensure that the election is /// completed. @@ -138,6 +138,9 @@ async fn propose_after_reelect() { group.disable_node(leader1); let (_leader, _term) = wait_for_election(&group).await; + + assert_new_leader_expire_client_id(&client).await; + assert_eq!( client .propose(TestCommand::new_get(vec![0]), true) @@ -181,6 +184,8 @@ async fn conflict_should_detected_in_new_leader() { group.unclog_link_client_nodes(group.nodes.keys().filter(|id| **id != leader1)); let (_leader, _term) = wait_for_election(&group).await; + assert_new_leader_expire_client_id(&client).await; + assert_eq!( client .propose(TestCommand::new_get(vec![0]), true) @@ -192,3 +197,12 @@ async fn conflict_should_detected_in_new_leader() { vec![0] ); } + +// NOTE: Currently propose to a new leader will not migrate the client id. +async fn assert_new_leader_expire_client_id(client: &SimClient) { + let err = client + .propose(TestCommand::new_get(vec![0]), true) + .await + .unwrap_err(); + assert!(err.message().contains("Expired client ID")); +} From 81a8b4bc7db10f42846ffe64f94e03777370576f Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 9 Sep 2024 09:41:11 +0800 Subject: [PATCH 128/322] chroe: fix typo Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/retry.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 701de44d9..1759c7bb2 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -111,7 +111,7 @@ impl Backoff { pub(crate) struct Context { /// The propose id propose_id: ProposeId, - /// First incomplete seqence + /// First incomplete sequence first_incomplete: u64, /// The current cluster state cluster_state: ClusterStateReady, From 975927f2bd6baa536768d0479ce962db64ba5f16 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 10 Sep 2024 17:32:52 +0800 Subject: [PATCH 129/322] chore: move client apis to a seperate submodule Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/connect.rs | 135 ++++++++++++++++++ crates/curp/src/client/mod.rs | 137 ++----------------- crates/curp/src/client/retry.rs | 3 +- crates/curp/src/client/tests.rs | 3 +- crates/curp/src/client/unary/mod.rs | 6 +- crates/curp/src/client/unary/propose_impl.rs | 2 +- 6 files changed, 153 insertions(+), 133 deletions(-) create mode 100644 crates/curp/src/client/connect.rs diff --git a/crates/curp/src/client/connect.rs b/crates/curp/src/client/connect.rs new file mode 100644 index 000000000..3e319dcc4 --- /dev/null +++ b/crates/curp/src/client/connect.rs @@ -0,0 +1,135 @@ +use async_trait::async_trait; +use curp_external_api::cmd::Command; +use tracing::debug; + +use crate::{ + members::ServerId, + rpc::{ConfChange, FetchClusterResponse, Member, ReadState}, +}; + +use super::retry::Context; + +/// The response of propose command, deserialized from [`crate::rpc::ProposeResponse`] or +/// [`crate::rpc::WaitSyncedResponse`]. +#[allow(type_alias_bounds)] // that's not bad +pub(crate) type ProposeResponse = Result<(C::ER, Option), C::Error>; + +/// `ClientApi`, a higher wrapper for `ConnectApi`, providing some methods for communicating to +/// the whole curp cluster. Automatically discovery curp server to update it's quorum. +#[async_trait] +#[allow(clippy::module_name_repetitions)] // better than just Api +pub trait ClientApi { + /// The client error + type Error; + + /// The command type + type Cmd: Command; + + /// Send propose to the whole cluster, `use_fast_path` set to `false` to fallback into ordered + /// requests (event the requests are commutative). + async fn propose( + &self, + cmd: &Self::Cmd, + token: Option<&String>, // TODO: Allow external custom interceptors, do not pass token in parameters + use_fast_path: bool, + ) -> Result, Self::Error>; + + /// Send propose configuration changes to the cluster + async fn propose_conf_change( + &self, + changes: Vec, + ) -> Result, Self::Error>; + + /// Send propose to shutdown cluster + async fn propose_shutdown(&self) -> Result<(), Self::Error>; + + /// Send propose to publish a node id and name + async fn propose_publish( + &self, + node_id: ServerId, + node_name: String, + node_client_urls: Vec, + ) -> Result<(), Self::Error>; + + /// Send move leader request + async fn move_leader(&self, node_id: ServerId) -> Result<(), Self::Error>; + + /// Send fetch read state from leader + async fn fetch_read_state(&self, cmd: &Self::Cmd) -> Result; + + /// Send fetch cluster requests to all servers (That's because initially, we didn't + /// know who the leader is.) + /// + /// Note: The fetched cluster may still be outdated if `linearizable` is false + async fn fetch_cluster(&self, linearizable: bool) -> Result; + + /// Fetch leader id + #[inline] + async fn fetch_leader_id(&self, linearizable: bool) -> Result { + if linearizable { + let resp = self.fetch_cluster(true).await?; + return Ok(resp + .leader_id + .unwrap_or_else(|| { + unreachable!("linearizable fetch cluster should return a leader id") + }) + .into()); + } + let resp = self.fetch_cluster(false).await?; + if let Some(id) = resp.leader_id { + return Ok(id.into()); + } + debug!("no leader id in FetchClusterResponse, try to send linearizable request"); + // fallback to linearizable fetch + self.fetch_leader_id(true).await + } +} + +/// This trait override some unrepeatable methods in ClientApi, and a client with this trait will be able to retry. +#[async_trait] +pub(crate) trait RepeatableClientApi { + /// The client error + type Error; + + /// The command type + type Cmd: Command; + + /// Send propose to the whole cluster, `use_fast_path` set to `false` to fallback into ordered + /// requests (event the requests are commutative). + async fn propose( + &self, + cmd: &Self::Cmd, + token: Option<&String>, + use_fast_path: bool, + ctx: Context, + ) -> Result, Self::Error>; + + /// Send propose configuration changes to the cluster + async fn propose_conf_change( + &self, + changes: Vec, + ctx: Context, + ) -> Result, Self::Error>; + + /// Send propose to shutdown cluster + async fn propose_shutdown(&self, ctx: Context) -> Result<(), Self::Error>; + + /// Send propose to publish a node id and name + async fn propose_publish( + &self, + node_id: ServerId, + node_name: String, + node_client_urls: Vec, + ctx: Context, + ) -> Result<(), Self::Error>; + + /// Send move leader request + async fn move_leader(&self, node_id: u64, ctx: Context) -> Result<(), Self::Error>; + + /// Send fetch read state from leader + async fn fetch_read_state( + &self, + cmd: &Self::Cmd, + ctx: Context, + ) -> Result; +} diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index b6567945c..35eca32b8 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -28,15 +28,20 @@ mod config; /// Lease keep alive implementation mod keep_alive; +/// Connect APIs +mod connect; + /// Tests for client #[cfg(test)] mod tests; +#[allow(clippy::module_name_repetitions)] // More conprehensive than just `Api` +pub use connect::ClientApi; + #[cfg(madsim)] use std::sync::atomic::AtomicU64; use std::{collections::HashMap, fmt::Debug, ops::Deref, sync::Arc, time::Duration}; -use async_trait::async_trait; use curp_external_api::cmd::Command; use futures::{stream::FuturesUnordered, StreamExt}; use parking_lot::RwLock; @@ -52,7 +57,7 @@ use self::{ config::Config, fetch::{ConnectToCluster, Fetch}, keep_alive::KeepAlive, - retry::{Context, Retry, RetryConfig}, + retry::{Retry, RetryConfig}, unary::Unary, }; use crate::{ @@ -61,89 +66,12 @@ use crate::{ self, connect::{BypassedConnect, ConnectApi}, protocol_client::ProtocolClient, - ConfChange, FetchClusterRequest, FetchClusterResponse, Member, ProposeId, Protocol, - ReadState, + FetchClusterRequest, FetchClusterResponse, ProposeId, Protocol, }, server::StreamingProtocol, tracker::Tracker, }; -/// The response of propose command, deserialized from [`crate::rpc::ProposeResponse`] or -/// [`crate::rpc::WaitSyncedResponse`]. -#[allow(type_alias_bounds)] // that's not bad -pub(crate) type ProposeResponse = Result<(C::ER, Option), C::Error>; - -/// `ClientApi`, a higher wrapper for `ConnectApi`, providing some methods for communicating to -/// the whole curp cluster. Automatically discovery curp server to update it's quorum. -#[async_trait] -#[allow(clippy::module_name_repetitions)] // better than just Api -pub trait ClientApi { - /// The client error - type Error; - - /// The command type - type Cmd: Command; - - /// Send propose to the whole cluster, `use_fast_path` set to `false` to fallback into ordered - /// requests (event the requests are commutative). - async fn propose( - &self, - cmd: &Self::Cmd, - token: Option<&String>, // TODO: Allow external custom interceptors, do not pass token in parameters - use_fast_path: bool, - ) -> Result, Self::Error>; - - /// Send propose configuration changes to the cluster - async fn propose_conf_change( - &self, - changes: Vec, - ) -> Result, Self::Error>; - - /// Send propose to shutdown cluster - async fn propose_shutdown(&self) -> Result<(), Self::Error>; - - /// Send propose to publish a node id and name - async fn propose_publish( - &self, - node_id: ServerId, - node_name: String, - node_client_urls: Vec, - ) -> Result<(), Self::Error>; - - /// Send move leader request - async fn move_leader(&self, node_id: ServerId) -> Result<(), Self::Error>; - - /// Send fetch read state from leader - async fn fetch_read_state(&self, cmd: &Self::Cmd) -> Result; - - /// Send fetch cluster requests to all servers (That's because initially, we didn't - /// know who the leader is.) - /// - /// Note: The fetched cluster may still be outdated if `linearizable` is false - async fn fetch_cluster(&self, linearizable: bool) -> Result; - - /// Fetch leader id - #[inline] - async fn fetch_leader_id(&self, linearizable: bool) -> Result { - if linearizable { - let resp = self.fetch_cluster(true).await?; - return Ok(resp - .leader_id - .unwrap_or_else(|| { - unreachable!("linearizable fetch cluster should return a leader id") - }) - .into()); - } - let resp = self.fetch_cluster(false).await?; - if let Some(id) = resp.leader_id { - return Ok(id.into()); - } - debug!("no leader id in FetchClusterResponse, try to send linearizable request"); - // fallback to linearizable fetch - self.fetch_leader_id(true).await - } -} - /// Propose id guard, used to ensure the sequence of propose id is recorded. struct ProposeIdGuard<'a> { /// The propose id @@ -176,55 +104,6 @@ impl Drop for ProposeIdGuard<'_> { } } -/// This trait override some unrepeatable methods in ClientApi, and a client with this trait will be able to retry. -#[async_trait] -trait RepeatableClientApi { - /// The client error - type Error; - - /// The command type - type Cmd: Command; - - /// Send propose to the whole cluster, `use_fast_path` set to `false` to fallback into ordered - /// requests (event the requests are commutative). - async fn propose( - &self, - cmd: &Self::Cmd, - token: Option<&String>, - use_fast_path: bool, - ctx: Context, - ) -> Result, Self::Error>; - - /// Send propose configuration changes to the cluster - async fn propose_conf_change( - &self, - changes: Vec, - ctx: Context, - ) -> Result, Self::Error>; - - /// Send propose to shutdown cluster - async fn propose_shutdown(&self, ctx: Context) -> Result<(), Self::Error>; - - /// Send propose to publish a node id and name - async fn propose_publish( - &self, - node_id: ServerId, - node_name: String, - node_client_urls: Vec, - ctx: Context, - ) -> Result<(), Self::Error>; - - /// Send move leader request - async fn move_leader(&self, node_id: u64, ctx: Context) -> Result<(), Self::Error>; - - /// Send fetch read state from leader - async fn fetch_read_state( - &self, - cmd: &Self::Cmd, - ctx: Context, - ) -> Result; -} - /// Client builder to build a client #[derive(Debug, Clone, Default)] #[allow(clippy::module_name_repetitions)] // better than just Builder diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 1759c7bb2..d91aaaff5 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -12,9 +12,10 @@ use tracing::{debug, warn}; use super::{ cluster_state::{ClusterState, ClusterStateInit, ClusterStateReady}, config::Config, + connect::{ProposeResponse, RepeatableClientApi}, fetch::Fetch, keep_alive::{KeepAlive, KeepAliveHandle}, - ClientApi, ProposeIdGuard, ProposeResponse, RepeatableClientApi, + ClientApi, ProposeIdGuard, }; use crate::{ members::ServerId, diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index d81c2ef73..37dd6317c 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -15,10 +15,11 @@ use super::{cluster_state::ClusterState, config::Config, unary::Unary}; use crate::{ client::{ cluster_state::ClusterStateReady, + connect::RepeatableClientApi, fetch::Fetch, keep_alive::KeepAlive, retry::{Context, Retry, RetryConfig}, - ClientApi, RepeatableClientApi, + ClientApi, }, members::ServerId, rpc::{ diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 5c676672c..691ad6097 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -7,7 +7,11 @@ use async_trait::async_trait; use curp_external_api::cmd::Command; use tracing::warn; -use super::{config::Config, retry::Context, ProposeResponse, RepeatableClientApi}; +use super::{ + config::Config, + connect::{ProposeResponse, RepeatableClientApi}, + retry::Context, +}; use crate::{ members::ServerId, rpc::{ diff --git a/crates/curp/src/client/unary/propose_impl.rs b/crates/curp/src/client/unary/propose_impl.rs index 4a94255b8..e6d585f6e 100644 --- a/crates/curp/src/client/unary/propose_impl.rs +++ b/crates/curp/src/client/unary/propose_impl.rs @@ -4,7 +4,7 @@ use curp_external_api::cmd::Command; use futures::{future, stream, FutureExt, Stream, StreamExt}; use crate::{ - client::{retry::Context, ProposeResponse}, + client::{connect::ProposeResponse, retry::Context}, quorum, rpc::{CurpError, OpResponse, ProposeRequest, RecordRequest, ResponseOp}, super_quorum, From 4842542ff3d05258a3800adfb368731ee0996e8f Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 24 Jul 2024 17:15:29 +0800 Subject: [PATCH 130/322] feat: re-add index barrier --- .../utils/src/{barrier.rs => barrier/id.rs} | 0 crates/utils/src/barrier/index.rs | 80 +++++++++++++++++++ crates/utils/src/barrier/mod.rs | 9 +++ 3 files changed, 89 insertions(+) rename crates/utils/src/{barrier.rs => barrier/id.rs} (100%) create mode 100644 crates/utils/src/barrier/index.rs create mode 100644 crates/utils/src/barrier/mod.rs diff --git a/crates/utils/src/barrier.rs b/crates/utils/src/barrier/id.rs similarity index 100% rename from crates/utils/src/barrier.rs rename to crates/utils/src/barrier/id.rs diff --git a/crates/utils/src/barrier/index.rs b/crates/utils/src/barrier/index.rs new file mode 100644 index 000000000..8ff4de6f1 --- /dev/null +++ b/crates/utils/src/barrier/index.rs @@ -0,0 +1,80 @@ +use std::{collections::BTreeMap, future::Future}; + +use clippy_utilities::OverflowArithmetic; +use event_listener::Event; +use parking_lot::Mutex; + +/// A Index trait that can be used as the index of `IndexBarrier`. +pub trait Index: Copy + Clone + Default + Ord + std::fmt::Debug { + /// Get the next index. + fn next(&self) -> Self; +} + +/// Waiter for index +#[derive(Debug)] +pub struct IndexBarrier { + /// Inner + inner: Mutex>, +} + +impl IndexBarrier +where + Idx: Index, +{ + /// Create a new index barrier + #[inline] + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Wait for the index until it is triggered. + #[inline] + pub fn wait(&self, index: Idx) -> Box + Send + Sync + 'static> { + let mut inner_l = self.inner.lock(); + if inner_l.last_trigger_index >= index { + return Box::new(futures::future::ready(())); + } + Box::new(inner_l.barriers.entry(index).or_default().listen()) + } + + /// Trigger all barriers whose index is less than or equal to the given + /// index. + #[inline] + pub fn trigger(&self, index: Idx) { + let mut inner_l = self.inner.lock(); + if inner_l.last_trigger_index < index { + inner_l.last_trigger_index = index; + } + let mut split_barriers = inner_l.barriers.split_off(&(index.next())); + std::mem::swap(&mut inner_l.barriers, &mut split_barriers); + for (_, barrier) in split_barriers { + let _ignore = barrier.notify(usize::MAX); + } + } +} + +impl Default for IndexBarrier +where + Idx: Index, +{ + #[inline] + fn default() -> Self { + Self::new() + } +} + +/// Inner of index barrier. +#[derive(Default, Debug)] +struct Inner { + /// The last index that the barrier has triggered. + last_trigger_index: Idx, + /// Barrier of index. + barriers: BTreeMap, +} + +impl Index for u64 { + fn next(&self) -> Self { + self.overflow_add(1) + } +} diff --git a/crates/utils/src/barrier/mod.rs b/crates/utils/src/barrier/mod.rs new file mode 100644 index 000000000..47c1a342e --- /dev/null +++ b/crates/utils/src/barrier/mod.rs @@ -0,0 +1,9 @@ +#![allow(clippy::module_name_repetitions)] + +/// Id barrier +mod id; +/// Index barrier +mod index; + +pub use id::IdBarrier; +pub use index::IndexBarrier; From 84a92539e70c037fa52b0e0d8947331d6ceb6599 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 19 Jul 2024 08:49:21 +0800 Subject: [PATCH 131/322] feat: implement client and server member learner rpcs --- crates/curp/build.rs | 5 +- crates/curp/proto/common | 2 +- crates/curp/src/client/connect.rs | 12 ++ crates/curp/src/client/keep_alive.rs | 27 ++++- .../src/client/{retry.rs => retry/mod.rs} | 16 ++- crates/curp/src/client/unary/mod.rs | 28 ++++- .../src/rpc/{connect.rs => connect/mod.rs} | 92 +++++++++++++-- crates/curp/src/rpc/mod.rs | 16 ++- crates/curp/src/rpc/reconnect.rs | 29 ++++- .../curp/src/server/curp_node/member_impl.rs | 35 ++++++ .../server/{curp_node.rs => curp_node/mod.rs} | 26 +++-- crates/curp/src/server/mod.rs | 110 +++++++++++++----- crates/xline/src/server/auth_wrapper.rs | 26 ++++- crates/xline/src/server/xline_server.rs | 2 +- 14 files changed, 356 insertions(+), 70 deletions(-) rename crates/curp/src/client/{retry.rs => retry/mod.rs} (96%) rename crates/curp/src/rpc/{connect.rs => connect/mod.rs} (91%) create mode 100644 crates/curp/src/server/curp_node/member_impl.rs rename crates/curp/src/server/{curp_node.rs => curp_node/mod.rs} (98%) diff --git a/crates/curp/build.rs b/crates/curp/build.rs index 581b934ec..96985ca5a 100644 --- a/crates/curp/build.rs +++ b/crates/curp/build.rs @@ -5,7 +5,10 @@ fn main() { "#[derive(serde::Deserialize, serde::Serialize)]", ) .compile( - &["./proto/common/src/curp-command.proto"], + &[ + "./proto/common/src/curp-command.proto", + "./proto/common/src/member.proto", + ], &["./proto/common/src"], ) .unwrap_or_else(|e| panic!("Failed to compile proto, error is {:?}", e)); diff --git a/crates/curp/proto/common b/crates/curp/proto/common index 19cfc8d48..f623076ee 160000 --- a/crates/curp/proto/common +++ b/crates/curp/proto/common @@ -1 +1 @@ -Subproject commit 19cfc8d48da30c190e240a477802b2b7f2a14633 +Subproject commit f623076eee58d90f284054861575f3e73d4b7c80 diff --git a/crates/curp/src/client/connect.rs b/crates/curp/src/client/connect.rs index 3e319dcc4..d5bb980f3 100644 --- a/crates/curp/src/client/connect.rs +++ b/crates/curp/src/client/connect.rs @@ -83,6 +83,12 @@ pub trait ClientApi { // fallback to linearizable fetch self.fetch_leader_id(true).await } + + /// Add some learners to the cluster. + async fn add_learner(&self, addrs: Vec) -> Result, Self::Error>; + + /// Remove some learners from the cluster. + async fn remove_learner(&self, ids: Vec) -> Result<(), Self::Error>; } /// This trait override some unrepeatable methods in ClientApi, and a client with this trait will be able to retry. @@ -132,4 +138,10 @@ pub(crate) trait RepeatableClientApi { cmd: &Self::Cmd, ctx: Context, ) -> Result; + + /// Add some learners to the cluster. + async fn add_learner(&self, addrs: Vec, ctx: Context) -> Result, Self::Error>; + + /// Remove some learners from the cluster. + async fn remove_learner(&self, ids: Vec, ctx: Context) -> Result<(), Self::Error>; } diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index 8f911972c..7a636b1e8 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -154,11 +154,12 @@ mod tests { use crate::rpc::{ connect::{ConnectApi, MockConnectApi}, - CurpError, FetchClusterRequest, FetchClusterResponse, FetchReadStateRequest, - FetchReadStateResponse, Member, MoveLeaderRequest, MoveLeaderResponse, OpResponse, - ProposeConfChangeRequest, ProposeConfChangeResponse, ProposeId, ProposeRequest, - ProposeResponse, PublishRequest, PublishResponse, ReadIndexResponse, RecordRequest, - RecordResponse, ResponseOp, ShutdownRequest, ShutdownResponse, SyncedResponse, + AddLearnerRequest, AddLearnerResponse, CurpError, FetchClusterRequest, + FetchClusterResponse, FetchReadStateRequest, FetchReadStateResponse, Member, + MoveLeaderRequest, MoveLeaderResponse, OpResponse, ProposeConfChangeRequest, + ProposeConfChangeResponse, ProposeId, ProposeRequest, ProposeResponse, PublishRequest, + PublishResponse, ReadIndexResponse, RecordRequest, RecordResponse, RemoveLearnerRequest, + RemoveLearnerResponse, ResponseOp, ShutdownRequest, ShutdownResponse, SyncedResponse, }; struct MockedStreamConnectApi { @@ -276,6 +277,22 @@ mod tests { ) -> Result { (self.lease_keep_alive_handle)(client_id).await } + + async fn add_learner( + &self, + _request: AddLearnerRequest, + _timeout: Duration, + ) -> Result, CurpError> { + unreachable!("please use MockedConnectApi") + } + + async fn remove_learner( + &self, + _request: RemoveLearnerRequest, + _timeout: Duration, + ) -> Result, CurpError> { + unreachable!("please use MockedConnectApi") + } } /// Create mocked stream connects diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry/mod.rs similarity index 96% rename from crates/curp/src/client/retry.rs rename to crates/curp/src/client/retry/mod.rs index d91aaaff5..c92fb1f4b 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry/mod.rs @@ -348,7 +348,9 @@ where | CurpError::InvalidConfig(()) | CurpError::NodeNotExists(()) | CurpError::NodeAlreadyExists(()) - | CurpError::LearnerNotCatchUp(()) => { + | CurpError::LearnerNotCatchUp(()) + | CurpError::InvalidMemberChange(()) + => { return Err(tonic::Status::from(err.clone())); } @@ -467,6 +469,18 @@ where }) .await } + + /// Add some learners to the cluster. + async fn add_learner(&self, addrs: Vec) -> Result, Self::Error> { + self.retry::<_, _>(|client, ctx| client.add_learner(addrs.clone(), ctx)) + .await + } + + /// Remove some learners from the cluster. + async fn remove_learner(&self, ids: Vec) -> Result<(), Self::Error> { + self.retry::<_, _>(|client, ctx| client.remove_learner(ids.clone(), ctx)) + .await + } } /// Tests for backoff diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 691ad6097..ecd938709 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -15,8 +15,8 @@ use super::{ use crate::{ members::ServerId, rpc::{ - ConfChange, CurpError, FetchReadStateRequest, Member, MoveLeaderRequest, - ProposeConfChangeRequest, PublishRequest, ReadState, ShutdownRequest, + AddLearnerRequest, ConfChange, CurpError, FetchReadStateRequest, Member, MoveLeaderRequest, + ProposeConfChangeRequest, PublishRequest, ReadState, RemoveLearnerRequest, ShutdownRequest, }, }; @@ -153,4 +153,28 @@ impl RepeatableClientApi for Unary { Ok(state) } + + /// Add some learners to the cluster. + async fn add_learner(&self, addrs: Vec, ctx: Context) -> Result, Self::Error> { + let req = AddLearnerRequest { node_addrs: addrs }; + let timeout = self.config.wait_synced_timeout(); + let resp = ctx + .cluster_state() + .map_leader(|conn| async move { conn.add_learner(req, timeout).await }) + .await?; + + Ok(resp.into_inner().node_ids) + } + + /// Remove some learners from the cluster. + async fn remove_learner(&self, ids: Vec, ctx: Context) -> Result<(), Self::Error> { + let req = RemoveLearnerRequest { node_ids: ids }; + let timeout = self.config.wait_synced_timeout(); + let _ig = ctx + .cluster_state() + .map_leader(|conn| async move { conn.remove_learner(req, timeout).await }) + .await?; + + Ok(()) + } } diff --git a/crates/curp/src/rpc/connect.rs b/crates/curp/src/rpc/connect/mod.rs similarity index 91% rename from crates/curp/src/rpc/connect.rs rename to crates/curp/src/rpc/connect/mod.rs index 35d2065aa..45eef5cf8 100644 --- a/crates/curp/src/rpc/connect.rs +++ b/crates/curp/src/rpc/connect/mod.rs @@ -14,14 +14,21 @@ use engine::SnapshotApi; use futures::Stream; #[cfg(test)] use mockall::automock; +use tokio::sync::mpsc::Sender; use tokio::sync::Mutex; +use tonic::transport::Channel; #[cfg(not(madsim))] use tonic::transport::ClientTlsConfig; -use tonic::transport::{Channel, Endpoint}; -use tracing::{debug, error, info, instrument}; +use tonic::transport::Endpoint; +use tower::discover::Change; +use tracing::debug; +use tracing::error; +use tracing::info; +use tracing::instrument; +use utils::build_endpoint; +use utils::tracing::Inject; #[cfg(madsim)] use utils::ClientTlsConfig; -use utils::{build_endpoint, tracing::Inject}; use crate::{ members::ServerId, @@ -44,7 +51,8 @@ use crate::{ use super::{ proto::commandpb::{ReadIndexRequest, ReadIndexResponse}, reconnect::Reconnect, - OpResponse, RecordRequest, RecordResponse, + AddLearnerRequest, AddLearnerResponse, OpResponse, RecordRequest, RecordResponse, + RemoveLearnerRequest, RemoveLearnerResponse, }; /// Install snapshot chunk size: 64KB @@ -229,6 +237,20 @@ pub(crate) trait ConnectApi: Send + Sync + 'static { /// Keep send lease keep alive to server and mutate the client id async fn lease_keep_alive(&self, client_id: u64, interval: Duration) -> Result; + + /// Add a learner to the cluster. + async fn add_learner( + &self, + request: AddLearnerRequest, + timeout: Duration, + ) -> Result, CurpError>; + + /// Remove a learner from the cluster. + async fn remove_learner( + &self, + request: RemoveLearnerRequest, + timeout: Duration, + ) -> Result, CurpError>; } /// Inner Connect interface among different servers @@ -306,8 +328,8 @@ impl Deref for InnerConnectApiWrapper { } } -/// The connection struct to hold the real rpc connections, it may failed to connect, but it also -/// retries the next time +/// The connection struct to hold the real rpc connections, it may failed to +/// connect, but it also retries the next time #[derive(Debug)] pub(crate) struct Connect { /// Server id @@ -315,7 +337,7 @@ pub(crate) struct Connect { /// The rpc connection rpc_connect: C, /// The rpc connection balance sender - change_tx: tokio::sync::mpsc::Sender>, + change_tx: Sender>, /// The current rpc connection address, when the address is updated, /// `addrs` will be used to remove previous connection addrs: Mutex>, @@ -382,6 +404,7 @@ impl Connect { } } +#[macro_export] /// Sets timeout for a client connection macro_rules! with_timeout { ($timeout:expr, $client_op:expr) => { @@ -531,6 +554,28 @@ impl ConnectApi for Connect> { info!("client_id update to {new_id}"); Ok(new_id) } + + async fn add_learner( + &self, + request: AddLearnerRequest, + timeout: Duration, + ) -> Result, CurpError> { + let mut client = self.rpc_connect.clone(); + let mut req = tonic::Request::new(request); + req.metadata_mut().inject_current(); + with_timeout!(timeout, client.add_learner(req)).map_err(Into::into) + } + + async fn remove_learner( + &self, + request: RemoveLearnerRequest, + timeout: Duration, + ) -> Result, CurpError> { + let mut client = self.rpc_connect.clone(); + let mut req = tonic::Request::new(request); + req.metadata_mut().inject_current(); + with_timeout!(timeout, client.remove_learner(req)).map_err(Into::into) + } } #[allow(clippy::let_and_return)] // for metrics @@ -632,8 +677,9 @@ impl InnerConnectApi for Connect> { } } -/// A connect api implementation which bypass kernel to dispatch method directly. -pub(crate) struct BypassedConnect { +/// A connect api implementation which bypass kernel to dispatch method +/// directly. +pub(crate) struct BypassedConnect { /// inner server server: T, /// server id @@ -824,6 +870,28 @@ where Ok(new_id) } + + async fn add_learner( + &self, + request: AddLearnerRequest, + _timeout: Duration, + ) -> Result, CurpError> { + let mut req = tonic::Request::new(request); + req.metadata_mut().inject_bypassed(); + req.metadata_mut().inject_current(); + self.server.add_learner(req).await.map_err(Into::into) + } + + async fn remove_learner( + &self, + request: RemoveLearnerRequest, + _timeout: Duration, + ) -> Result, CurpError> { + let mut req = tonic::Request::new(request); + req.metadata_mut().inject_bypassed(); + req.metadata_mut().inject_current(); + self.server.remove_learner(req).await.map_err(Into::into) + } } /// Generate heartbeat stream @@ -889,8 +957,10 @@ fn install_snapshot_stream( #[cfg(test)] mod tests { use bytes::Bytes; - use engine::{EngineType, Snapshot as EngineSnapshot}; - use futures::{pin_mut, StreamExt}; + use engine::EngineType; + use engine::Snapshot as EngineSnapshot; + use futures::pin_mut; + use futures::StreamExt; use test_macros::abort_on_panic; use tracing_test::traced_test; diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index d561e27bf..095a30c11 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -27,6 +27,8 @@ pub use self::proto::{ propose_conf_change_request::{ConfChange, ConfChangeType}, protocol_client, protocol_server::{Protocol, ProtocolServer}, + AddLearnerRequest, + AddLearnerResponse, CmdResult, FetchClusterRequest, FetchClusterResponse, @@ -49,6 +51,8 @@ pub use self::proto::{ ReadIndexResponse, RecordRequest, RecordResponse, + RemoveLearnerRequest, + RemoveLearnerResponse, ShutdownRequest, ShutdownResponse, SyncedResponse, @@ -667,6 +671,11 @@ impl CurpError { Self::Internal(reason.into()) } + /// `InvalidMemberChange` error + pub(crate) fn invalid_member_change() -> Self { + Self::InvalidMemberChange(()) + } + /// Whether to abort fast round early pub(crate) fn should_abort_fast_round(&self) -> bool { matches!( @@ -709,7 +718,8 @@ impl CurpError { | CurpError::ExpiredClientId(()) | CurpError::Redirect(_) | CurpError::WrongClusterVersion(()) - | CurpError::Zombie(()) => CurpErrorPriority::High, + | CurpError::Zombie(()) + | CurpError::InvalidMemberChange(()) => CurpErrorPriority::High, CurpError::RpcTransport(()) | CurpError::Internal(_) | CurpError::KeyConflict(()) @@ -816,6 +826,10 @@ impl From for tonic::Status { tonic::Code::FailedPrecondition, "Zombie leader error: The leader is a zombie with outdated term.", ), + CurpError::InvalidMemberChange(()) => ( + tonic::Code::FailedPrecondition, + "Invalid membership change error: The requeted change is invalid.", + ), }; let details = CurpErrorWrapper { err: Some(err) }.encode_to_vec(); diff --git a/crates/curp/src/rpc/reconnect.rs b/crates/curp/src/rpc/reconnect.rs index a52af876a..22b6b4684 100644 --- a/crates/curp/src/rpc/reconnect.rs +++ b/crates/curp/src/rpc/reconnect.rs @@ -7,11 +7,12 @@ use futures::Stream; use crate::{ members::ServerId, rpc::{ - connect::ConnectApi, CurpError, FetchClusterRequest, FetchClusterResponse, - FetchReadStateRequest, FetchReadStateResponse, MoveLeaderRequest, MoveLeaderResponse, - OpResponse, ProposeConfChangeRequest, ProposeConfChangeResponse, ProposeRequest, - PublishRequest, PublishResponse, ReadIndexResponse, RecordRequest, RecordResponse, - ShutdownRequest, ShutdownResponse, + connect::ConnectApi, AddLearnerRequest, AddLearnerResponse, CurpError, FetchClusterRequest, + FetchClusterResponse, FetchReadStateRequest, FetchReadStateResponse, MoveLeaderRequest, + MoveLeaderResponse, OpResponse, ProposeConfChangeRequest, ProposeConfChangeResponse, + ProposeRequest, PublishRequest, PublishResponse, ReadIndexResponse, RecordRequest, + RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, ShutdownRequest, + ShutdownResponse, }, }; @@ -181,4 +182,22 @@ impl ConnectApi for Reconnect { _empty = self.event.listen() => Err(CurpError::RpcTransport(())), } } + + /// Add a learner to the cluster. + async fn add_learner( + &self, + request: AddLearnerRequest, + timeout: Duration, + ) -> Result, CurpError> { + execute_with_reconnect!(self, ConnectApi::add_learner, request, timeout) + } + + /// Remove a learner from the cluster. + async fn remove_learner( + &self, + request: RemoveLearnerRequest, + timeout: Duration, + ) -> Result, CurpError> { + execute_with_reconnect!(self, ConnectApi::remove_learner, request, timeout) + } } diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs new file mode 100644 index 000000000..f752d7145 --- /dev/null +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -0,0 +1,35 @@ +#![allow( + clippy::unused_self, + clippy::unimplemented, + clippy::needless_pass_by_value +)] // TODO: remove this after implemented + +use curp_external_api::cmd::Command; +use curp_external_api::cmd::CommandExecutor; +use curp_external_api::role_change::RoleChange; + +use crate::rpc::AddLearnerRequest; +use crate::rpc::AddLearnerResponse; +use crate::rpc::CurpError; +use crate::rpc::RemoveLearnerRequest; +use crate::rpc::RemoveLearnerResponse; + +use super::CurpNode; + +impl, RC: RoleChange> CurpNode { + /// Adds a learner to the cluster + pub(crate) fn add_learner( + &self, + _request: AddLearnerRequest, + ) -> Result { + unimplemented!() + } + + /// Removes a learner from the cluster + pub(crate) fn remove_learner( + &self, + _request: RemoveLearnerRequest, + ) -> Result { + unimplemented!() + } +} diff --git a/crates/curp/src/server/curp_node.rs b/crates/curp/src/server/curp_node/mod.rs similarity index 98% rename from crates/curp/src/server/curp_node.rs rename to crates/curp/src/server/curp_node/mod.rs index 95a4d15f4..504565920 100644 --- a/crates/curp/src/server/curp_node.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -64,6 +64,9 @@ use crate::{ snapshot::{Snapshot, SnapshotMeta}, }; +/// `CurpNode` member implementation +mod member_impl; + /// After sync entry, composed of a log entry and response sender pub(crate) type AfterSyncEntry = (Arc>, Option>); @@ -636,7 +639,8 @@ impl, RC: RoleChange> CurpNode { #[allow(clippy::arithmetic_side_effects, clippy::ignored_unit_patterns)] async fn election_task(curp: Arc>, shutdown_listener: Listener) { let heartbeat_interval = curp.cfg().heartbeat_interval; - // wait for some random time before tick starts to minimize vote split possibility + // wait for some random time before tick starts to minimize vote split + // possibility let rand = thread_rng() .gen_range(0..heartbeat_interval.as_millis()) .numeric_cast(); @@ -653,8 +657,9 @@ impl, RC: RoleChange> CurpNode { } } if let Some(pre_vote_or_vote) = curp.tick_election() { - // bcast pre vote or vote, if it is a pre vote and success, it will return Some(vote) - // then we need to bcast normal vote, and bcast normal vote always return None + // bcast pre vote or vote, if it is a pre vote and success, it will return + // Some(vote) then we need to bcast normal vote, and bcast + // normal vote always return None if let Some(vote) = Self::bcast_vote(curp.as_ref(), pre_vote_or_vote.clone()).await { debug_assert!( @@ -763,7 +768,8 @@ impl, RC: RoleChange> CurpNode { let mut is_shutdown_state = false; let mut ae_fail_count = 0; loop { - // a sync is either triggered by an heartbeat timeout event or when new log entries arrive + // a sync is either triggered by an heartbeat timeout event or when new log + // entries arrive tokio::select! { state = shutdown_listener.wait_state(), if !is_shutdown_state => { match state { @@ -1138,7 +1144,8 @@ impl, RC: RoleChange> CurpNode { }); // (hb_opt, entries) status combination // (false, empty) => send heartbeat to followers - // (true, empty) => indicates that `batch_timeout` expired, and during this period there is not any log generated. Do nothing + // (true, empty) => indicates that `batch_timeout` expired, and during this + // period there is not any log generated. Do nothing // (true | false, not empty) => send append entries if !*hb_opt || !is_empty { match Self::send_ae(connect, curp, ae).await { @@ -1169,9 +1176,12 @@ impl, RC: RoleChange> CurpNode { debug!("ae rejected by {}", connect.id()); } // Check Follower shutdown - // When the leader is in the shutdown state, its last log must be shutdown, and if the follower is - // already synced with leader and current AE is a heartbeat, then the follower will commit the shutdown - // log after AE, or when the follower is not synced with the leader, the current AE will send and directly commit + // When the leader is in the shutdown state, its last log must be + // shutdown, and if the follower is + // already synced with leader and current AE is a heartbeat, then the + // follower will commit the shutdown + // log after AE, or when the follower is not synced with the leader, the + // current AE will send and directly commit // shutdown log. if is_shutdown_state && ((curp.is_synced(connect_id) && is_empty) diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index d2dc8d99f..1b60a7423 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -1,4 +1,5 @@ -use std::{fmt::Debug, sync::Arc}; +use std::fmt::Debug; +use std::sync::Arc; use engine::SnapshotAllocator; use flume::r#async::RecvStream; @@ -7,34 +8,56 @@ use tokio::sync::broadcast; #[cfg(not(madsim))] use tonic::transport::ClientTlsConfig; use tracing::instrument; +use utils::config::CurpConfig; +use utils::task_manager::TaskManager; +use utils::tracing::Extract; #[cfg(madsim)] use utils::ClientTlsConfig; -use utils::{config::CurpConfig, task_manager::TaskManager, tracing::Extract}; +pub use self::conflict::spec_pool_new::SpObject; +pub use self::conflict::uncommitted_pool::UcpObject; use self::curp_node::CurpNode; -pub use self::{ - conflict::{spec_pool_new::SpObject, uncommitted_pool::UcpObject}, - raw_curp::RawCurp, -}; -use crate::rpc::{OpResponse, RecordRequest, RecordResponse}; -use crate::{ - cmd::{Command, CommandExecutor}, - members::{ClusterInfo, ServerId}, - role_change::RoleChange, - rpc::{ - connect::Bypass, AppendEntriesRequest, AppendEntriesResponse, FetchClusterRequest, - FetchClusterResponse, FetchReadStateRequest, FetchReadStateResponse, - InstallSnapshotRequest, InstallSnapshotResponse, LeaseKeepAliveMsg, MoveLeaderRequest, - MoveLeaderResponse, ProposeConfChangeRequest, ProposeConfChangeResponse, ProposeRequest, - PublishRequest, PublishResponse, ShutdownRequest, ShutdownResponse, TriggerShutdownRequest, - TriggerShutdownResponse, TryBecomeLeaderNowRequest, TryBecomeLeaderNowResponse, - VoteRequest, VoteResponse, - }, -}; -use crate::{ - response::ResponseSender, - rpc::{ReadIndexRequest, ReadIndexResponse}, -}; +pub use self::raw_curp::RawCurp; +use crate::cmd::Command; +use crate::cmd::CommandExecutor; +use crate::members::ClusterInfo; +use crate::members::ServerId; +use crate::response::ResponseSender; +use crate::role_change::RoleChange; +use crate::rpc::connect::Bypass; +use crate::rpc::AddLearnerRequest; +use crate::rpc::AddLearnerResponse; +use crate::rpc::AppendEntriesRequest; +use crate::rpc::AppendEntriesResponse; +use crate::rpc::FetchClusterRequest; +use crate::rpc::FetchClusterResponse; +use crate::rpc::FetchReadStateRequest; +use crate::rpc::FetchReadStateResponse; +use crate::rpc::InstallSnapshotRequest; +use crate::rpc::InstallSnapshotResponse; +use crate::rpc::LeaseKeepAliveMsg; +use crate::rpc::MoveLeaderRequest; +use crate::rpc::MoveLeaderResponse; +use crate::rpc::OpResponse; +use crate::rpc::ProposeConfChangeRequest; +use crate::rpc::ProposeConfChangeResponse; +use crate::rpc::ProposeRequest; +use crate::rpc::PublishRequest; +use crate::rpc::PublishResponse; +use crate::rpc::ReadIndexRequest; +use crate::rpc::ReadIndexResponse; +use crate::rpc::RecordRequest; +use crate::rpc::RecordResponse; +use crate::rpc::RemoveLearnerRequest; +use crate::rpc::RemoveLearnerResponse; +use crate::rpc::ShutdownRequest; +use crate::rpc::ShutdownResponse; +use crate::rpc::TriggerShutdownRequest; +use crate::rpc::TriggerShutdownResponse; +use crate::rpc::TryBecomeLeaderNowRequest; +use crate::rpc::TryBecomeLeaderNowResponse; +use crate::rpc::VoteRequest; +use crate::rpc::VoteResponse; /// Command worker to do execution and after sync mod cmd_worker; @@ -63,14 +86,17 @@ mod lease_manager; /// Curp metrics mod metrics; -pub use storage::{db::DB, StorageApi, StorageError}; +pub use storage::db::DB; +pub use storage::StorageApi; +pub use storage::StorageError; /// The Rpc Server to handle rpc requests /// /// This Wrapper is introduced due to the `MadSim` rpc lib #[derive(Debug)] pub struct Rpc, RC: RoleChange> { - /// The inner server is wrapped in an Arc so that its state can be shared while cloning the rpc wrapper + /// The inner server is wrapped in an Arc so that its state can be shared + /// while cloning the rpc wrapper inner: Arc>, } @@ -199,6 +225,28 @@ impl, RC: RoleChange> crate::rpc::Protocol fo self.inner.lease_keep_alive(req_stream).await?, )) } + + #[instrument(skip_all, name = "add_learner")] + async fn add_learner( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + self.inner + .add_learner(request.into_inner()) + .map(tonic::Response::new) + .map_err(Into::into) + } + + #[instrument(skip_all, name = "remove_learner")] + async fn remove_learner( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + self.inner + .remove_learner(request.into_inner()) + .map(tonic::Response::new) + .map_err(Into::into) + } } #[tonic::async_trait] @@ -329,7 +377,8 @@ impl, RC: RoleChange> Rpc { } } - /// Run a new rpc server on a specific addr, designed to be used in the tests + /// Run a new rpc server on a specific addr, designed to be used in the + /// tests /// /// # Errors /// @@ -354,7 +403,9 @@ impl, RC: RoleChange> Rpc { ) -> Result<(), crate::error::ServerError> { use utils::task_manager::tasks::TaskName; - use crate::rpc::{InnerProtocolServer, ProtocolServer}; + use crate::rpc::InnerProtocolServer; + use crate::rpc::MemberProtocolServer; + use crate::rpc::ProtocolServer; let n = task_manager .get_shutdown_listener(TaskName::TonicServer) @@ -375,6 +426,7 @@ impl, RC: RoleChange> Rpc { tonic::transport::Server::builder() .add_service(ProtocolServer::new(server.clone())) + .add_service(MemberProtocolServer::new(server.clone())) .add_service(InnerProtocolServer::new(server)) .serve_with_shutdown(addr, n.wait()) .await?; diff --git a/crates/xline/src/server/auth_wrapper.rs b/crates/xline/src/server/auth_wrapper.rs index 1df9d65d0..9f5c3a135 100644 --- a/crates/xline/src/server/auth_wrapper.rs +++ b/crates/xline/src/server/auth_wrapper.rs @@ -3,11 +3,12 @@ use std::sync::Arc; use curp::{ cmd::PbCodec, rpc::{ - FetchClusterRequest, FetchClusterResponse, FetchReadStateRequest, FetchReadStateResponse, - LeaseKeepAliveMsg, MoveLeaderRequest, MoveLeaderResponse, OpResponse, - ProposeConfChangeRequest, ProposeConfChangeResponse, ProposeRequest, Protocol, - PublishRequest, PublishResponse, ReadIndexRequest, ReadIndexResponse, RecordRequest, - RecordResponse, ShutdownRequest, ShutdownResponse, + AddLearnerRequest, AddLearnerResponse, FetchClusterRequest, FetchClusterResponse, + FetchReadStateRequest, FetchReadStateResponse, LeaseKeepAliveMsg, MoveLeaderRequest, + MoveLeaderResponse, OpResponse, ProposeConfChangeRequest, ProposeConfChangeResponse, + ProposeRequest, Protocol, PublishRequest, PublishResponse, ReadIndexRequest, + ReadIndexResponse, RecordRequest, RecordResponse, RemoveLearnerRequest, + RemoveLearnerResponse, ShutdownRequest, ShutdownResponse, }, }; use flume::r#async::RecvStream; @@ -18,6 +19,7 @@ use super::xline_server::CurpServer; use crate::storage::AuthStore; /// Auth wrapper +#[derive(Clone)] pub(crate) struct AuthWrapper { /// Curp server curp_server: CurpServer, @@ -120,4 +122,18 @@ impl Protocol for AuthWrapper { ) -> Result, tonic::Status> { self.curp_server.lease_keep_alive(request).await } + + async fn add_learner( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + self.curp_server.add_learner(request).await + } + + async fn remove_learner( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + self.curp_server.remove_learner(request).await + } } diff --git a/crates/xline/src/server/xline_server.rs b/crates/xline/src/server/xline_server.rs index 182c0c4ad..0ece5cdc6 100644 --- a/crates/xline/src/server/xline_server.rs +++ b/crates/xline/src/server/xline_server.rs @@ -318,7 +318,7 @@ impl XlineServer { .add_service(RpcWatchServer::new(watch_server)) .add_service(RpcMaintenanceServer::new(maintenance_server)) .add_service(RpcClusterServer::new(cluster_server)) - .add_service(ProtocolServer::new(auth_wrapper)); + .add_service(ProtocolServer::new(auth_wrapper.clone())); let curp_router = builder .add_service(ProtocolServer::new(curp_server.clone())) .add_service(InnerProtocolServer::new(curp_server)); From d301c6363711d3b63cf4c4c660ddd24658959220 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 24 Jul 2024 10:54:08 +0800 Subject: [PATCH 132/322] feat: implement learner requests on curp server refactor: implement `commit` and `truncate` in `MembershipState` --- crates/curp/src/lib.rs | 6 +- crates/curp/src/log_entry/entry_data.rs | 51 +++++ .../src/{log_entry.rs => log_entry/mod.rs} | 49 +---- crates/curp/src/member.rs | 113 +++++++++++ crates/curp/src/server/cmd_worker/mod.rs | 3 + .../curp/src/server/curp_node/member_impl.rs | 25 ++- crates/curp/src/server/mod.rs | 2 + .../curp/src/server/raw_curp/member_impl.rs | 80 ++++++++ crates/curp/src/server/raw_curp/mod.rs | 192 +++++++++++------- crates/utils/src/barrier/id.rs | 5 +- 10 files changed, 399 insertions(+), 127 deletions(-) create mode 100644 crates/curp/src/log_entry/entry_data.rs rename crates/curp/src/{log_entry.rs => log_entry/mod.rs} (58%) create mode 100644 crates/curp/src/member.rs create mode 100644 crates/curp/src/server/raw_curp/member_impl.rs diff --git a/crates/curp/src/lib.rs b/crates/curp/src/lib.rs index e5e5111b6..868e763b6 100644 --- a/crates/curp/src/lib.rs +++ b/crates/curp/src/lib.rs @@ -206,6 +206,9 @@ mod snapshot; /// Propose response sender mod response; +/// Membership state +mod member; + /// Calculate the super quorum #[inline] #[must_use] @@ -223,7 +226,8 @@ fn quorum(size: usize) -> usize { size / 2 + 1 } -/// Calculate the `recover_quorum`: the smallest number of servers who must contain a command in speculative pool for it to be recovered +/// Calculate the `recover_quorum`: the smallest number of servers who must +/// contain a command in speculative pool for it to be recovered #[inline] #[must_use] #[allow(clippy::arithmetic_side_effects)] // it's safe diff --git a/crates/curp/src/log_entry/entry_data.rs b/crates/curp/src/log_entry/entry_data.rs new file mode 100644 index 000000000..2b7409ebb --- /dev/null +++ b/crates/curp/src/log_entry/entry_data.rs @@ -0,0 +1,51 @@ +use std::sync::Arc; + +use serde::Deserialize; +use serde::Serialize; + +use crate::members::ServerId; +use crate::rpc::ConfChange; +use crate::rpc::PublishRequest; +use crate::member::Membership; + +/// Entry data of a `LogEntry` +#[derive(Debug, Clone, Serialize, Deserialize)] +#[cfg_attr(test, derive(PartialEq))] +pub(crate) enum EntryData { + /// Empty entry + Empty, + /// `Command` entry + Command(Arc), + /// `ConfChange` entry + ConfChange(Vec), + /// `Shutdown` entry + Shutdown, + /// `SetNodeState` entry + SetNodeState(ServerId, String, Vec), + /// `Member` entry + Member(Membership), +} + +impl From> for EntryData { + fn from(cmd: Arc) -> Self { + EntryData::Command(cmd) + } +} + +impl From> for EntryData { + fn from(value: Vec) -> Self { + Self::ConfChange(value) + } +} + +impl From for EntryData { + fn from(value: PublishRequest) -> Self { + EntryData::SetNodeState(value.node_id, value.name, value.client_urls) + } +} + +impl From for EntryData { + fn from(value: Membership) -> Self { + EntryData::Member(value) + } +} diff --git a/crates/curp/src/log_entry.rs b/crates/curp/src/log_entry/mod.rs similarity index 58% rename from crates/curp/src/log_entry.rs rename to crates/curp/src/log_entry/mod.rs index 96ba66d8d..f89ebfdcd 100644 --- a/crates/curp/src/log_entry.rs +++ b/crates/curp/src/log_entry/mod.rs @@ -1,15 +1,14 @@ -use std::{ - hash::{Hash, Hasher}, - sync::Arc, -}; +use std::hash::{Hash, Hasher}; use curp_external_api::{cmd::Command, InflightId, LogIndex}; use serde::{Deserialize, Serialize}; -use crate::{ - members::ServerId, - rpc::{ConfChange, ProposeId, PublishRequest}, -}; +use crate::rpc::ProposeId; + +pub(crate) use entry_data::EntryData; + +/// Definition of different entry data types +mod entry_data; /// Log entry #[derive(Debug, Clone, Serialize, Deserialize)] @@ -25,40 +24,6 @@ pub struct LogEntry { pub(crate) entry_data: EntryData, } -/// Entry data of a `LogEntry` -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(test, derive(PartialEq))] -pub(crate) enum EntryData { - /// Empty entry - Empty, - /// `Command` entry - Command(Arc), - /// `ConfChange` entry - ConfChange(Vec), - /// `Shutdown` entry - Shutdown, - /// `SetNodeState` entry - SetNodeState(ServerId, String, Vec), -} - -impl From> for EntryData { - fn from(cmd: Arc) -> Self { - EntryData::Command(cmd) - } -} - -impl From> for EntryData { - fn from(value: Vec) -> Self { - Self::ConfChange(value) - } -} - -impl From for EntryData { - fn from(value: PublishRequest) -> Self { - EntryData::SetNodeState(value.node_id, value.name, value.client_urls) - } -} - impl LogEntry where C: Command, diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs new file mode 100644 index 000000000..1b5bb5a4e --- /dev/null +++ b/crates/curp/src/member.rs @@ -0,0 +1,113 @@ +use std::collections::hash_map::Entry; +use std::collections::HashMap; +use std::collections::HashSet; + +use curp_external_api::LogIndex; +use serde::Deserialize; +use serde::Serialize; + +/// Membership state stored in current node +#[derive(Debug, Default)] +pub(crate) struct MembershipState { + /// Config that exist in log, but haven't committed + effective: Membership, + /// Index of the effective membership + index_effective: LogIndex, + /// Committed membership config + committed: Membership, +} + +#[allow(unused)] +impl MembershipState { + /// Update the effective membership + pub(crate) fn update_effective(&mut self, config: Membership) { + self.effective = config; + } + + /// Update the committed membership + pub(crate) fn update_commit(&mut self, config: Membership) { + self.committed = config; + } + + /// Append a membership change entry + pub(crate) fn append(&mut self, index: LogIndex, membership: Membership) { + self.index_effective = index; + self.effective = membership; + } + + /// Commit a membership index + pub(crate) fn commit(&mut self, at: LogIndex) { + if at >= self.index_effective { + self.committed = self.effective.clone(); + } + } + + /// Truncate at the give log index + pub(crate) fn truncate(&mut self, at: LogIndex) { + if at < self.index_effective { + self.effective = self.committed.clone(); + self.index_effective = at; + } + } + + /// Returns the committed membership + pub(crate) fn committed(&self) -> &Membership { + &self.committed + } + + /// Returns the effective membership + pub(crate) fn effective(&self) -> &Membership { + &self.effective + } +} + +/// Membership config +#[derive(Clone, Debug, Default, Serialize, Deserialize)] +#[cfg_attr(test, derive(PartialEq))] +pub(crate) struct Membership { + /// Member of the cluster + members: Vec>, + /// All Nodes, including members and learners + nodes: HashMap, +} + +impl Membership { + /// Generates a new membership from `Change` + /// + /// Returns `None` if the change is invalid + pub(crate) fn change(&self, change: Change) -> Option { + match change { + Change::AddLearner(learners) => { + let members = self.members.clone(); + let mut nodes = self.nodes.clone(); + for (id, addr) in learners { + match nodes.entry(id) { + Entry::Occupied(_) => return None, + Entry::Vacant(e) => { + let _ignore = e.insert(addr); + } + } + } + + Some(Self { members, nodes }) + } + Change::RemoveLearner(ids) => { + let members = self.members.clone(); + let mut nodes = self.nodes.clone(); + for id in ids { + let _ignore = nodes.remove(&id)?; + } + + Some(Self { members, nodes }) + } + } + } +} + +/// The change of membership +pub(crate) enum Change { + /// Adds learners + AddLearner(Vec<(u64, String)>), + /// Removes learners + RemoveLearner(Vec), +} diff --git a/crates/curp/src/server/cmd_worker/mod.rs b/crates/curp/src/server/cmd_worker/mod.rs index 7ac5307b0..c2fbfe714 100644 --- a/crates/curp/src/server/cmd_worker/mod.rs +++ b/crates/curp/src/server/cmd_worker/mod.rs @@ -224,9 +224,12 @@ async fn after_sync_others, RC: RoleChange>( } // The no-op command has been applied to state machine (EntryData::Empty, _) => curp.set_no_op_applied(), + (EntryData::Member(config), _) => curp.commit_membership(config.clone()), + _ => unreachable!(), } ce.trigger(entry.inflight_id()); + curp.trigger(&entry.propose_id); debug!("{id} cmd({}) after sync is called", entry.propose_id); } } diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index f752d7145..babc716f7 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -18,18 +18,31 @@ use super::CurpNode; impl, RC: RoleChange> CurpNode { /// Adds a learner to the cluster - pub(crate) fn add_learner( + pub(crate) async fn add_learner( &self, - _request: AddLearnerRequest, + request: AddLearnerRequest, ) -> Result { - unimplemented!() + let addrs = request.node_addrs; + let ret = self.curp.add_learner(&addrs); + self.curp.wait_propose_ids(Some(ret.propose_id())).await; + + Ok(AddLearnerResponse { + node_ids: ret.into_inner(), + }) } /// Removes a learner from the cluster - pub(crate) fn remove_learner( + pub(crate) async fn remove_learner( &self, - _request: RemoveLearnerRequest, + request: RemoveLearnerRequest, ) -> Result { - unimplemented!() + let node_ids = request.node_ids; + let ret = self + .curp + .remove_learner(node_ids) + .ok_or(CurpError::invalid_member_change())?; + self.curp.wait_propose_ids(Some(ret.propose_id())).await; + + Ok(RemoveLearnerResponse {}) } } diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index 1b60a7423..0f1286527 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -233,6 +233,7 @@ impl, RC: RoleChange> crate::rpc::Protocol fo ) -> Result, tonic::Status> { self.inner .add_learner(request.into_inner()) + .await .map(tonic::Response::new) .map_err(Into::into) } @@ -244,6 +245,7 @@ impl, RC: RoleChange> crate::rpc::Protocol fo ) -> Result, tonic::Status> { self.inner .remove_learner(request.into_inner()) + .await .map(tonic::Response::new) .map_err(Into::into) } diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs new file mode 100644 index 000000000..321416e5c --- /dev/null +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -0,0 +1,80 @@ +use curp_external_api::cmd::Command; +use curp_external_api::role_change::RoleChange; +use rand::Rng; + +use crate::member::Change; +use crate::member::Membership; +use crate::rpc::ProposeId; + +use super::RawCurp; + +impl RawCurp { + /// Adds a learner to the membership state + pub(crate) fn add_learner(&self, addrs: &[String]) -> ReturnValueWrapper> { + let mut ms_w = self.ms.write(); + let mut log_w = self.log.write(); + loop { + let ids = random_ids(addrs.len()); + let change = ids.clone().into_iter().zip(addrs.to_owned()).collect(); + let Some(config) = ms_w.committed().change(Change::AddLearner(change)) else { + continue; + }; + ms_w.update_effective(config.clone()); + let st_r = self.st.read(); + let propose_id = ProposeId(rand::random(), 0); + let _entry = log_w.push(st_r.term, propose_id, config); + return ReturnValueWrapper::new(ids, propose_id); + } + } + + /// Removes a learner from the membership state + pub(crate) fn remove_learner(&self, ids: Vec) -> Option> { + let mut ms_w = self.ms.write(); + let mut log_w = self.log.write(); + let config = ms_w.committed().change(Change::RemoveLearner(ids))?; + ms_w.update_effective(config.clone()); + let st_r = self.st.read(); + let propose_id = ProposeId(rand::random(), 0); + let _entry = log_w.push(st_r.term, propose_id, config); + Some(ReturnValueWrapper::new((), propose_id)) + } + + /// Updates the committed membership + pub(crate) fn commit_membership(&self, config: Membership) { + let mut ms_w = self.ms.write(); + ms_w.update_commit(config); + } +} + +/// Wrapper for the return value of the raw curp methods +/// +/// It wraps the actual return value and the propose id of the request +pub(crate) struct ReturnValueWrapper { + /// The actual return value + value: T, + /// The propose id of the request + propose_id: ProposeId, +} + +impl ReturnValueWrapper { + /// Creates a new return value wrapper + pub(crate) fn new(value: T, propose_id: ProposeId) -> Self { + Self { value, propose_id } + } + + /// Returns the propose id of the request + pub(crate) fn propose_id(&self) -> ProposeId { + self.propose_id + } + + /// Unwraps the return value + pub(crate) fn into_inner(self) -> T { + self.value + } +} + +/// Generate random ids of the given length +fn random_ids(n: usize) -> Vec { + let mut rng = rand::thread_rng(); + (0..n).map(|_| rng.gen()).collect() +} diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index b6f529c12..bbe10e44d 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -1,6 +1,7 @@ //! READ THIS BEFORE YOU START WRITING CODE FOR THIS MODULE //! To avoid deadlock, let's make some rules: -//! 1. To group similar functions, I divide Curp impl into three scope: one for utils(don't grab lock here), one for tick, one for handlers +//! 1. To group similar functions, I divide Curp impl into three scope: one for +//! utils(don't grab lock here), one for tick, one for handlers //! 2. Lock order should be: //! 1. self.st //! 2. self.cst @@ -9,73 +10,86 @@ #![allow(clippy::similar_names)] // st, lst, cst is similar but not confusing #![allow(clippy::arithmetic_side_effects)] // u64 is large enough and won't overflow -use std::{ - cmp::{self, min}, - collections::{HashMap, HashSet}, - fmt::Debug, - sync::{ - atomic::{AtomicU64, AtomicU8, Ordering}, - Arc, - }, -}; - -use clippy_utilities::{NumericCast, OverflowArithmetic}; +use std::cmp; +use std::cmp::min; +use std::collections::HashMap; +use std::collections::HashSet; +use std::fmt::Debug; +use std::sync::atomic::AtomicU64; +use std::sync::atomic::AtomicU8; +use std::sync::atomic::Ordering; +use std::sync::Arc; + +use clippy_utilities::NumericCast; +use clippy_utilities::OverflowArithmetic; use dashmap::DashMap; use derive_builder::Builder; use event_listener::Event; use futures::Future; use itertools::Itertools; use opentelemetry::KeyValue; -use parking_lot::{Mutex, RwLock, RwLockUpgradableReadGuard, RwLockWriteGuard}; -use tokio::sync::{broadcast, oneshot}; +use parking_lot::Mutex; +use parking_lot::RwLock; +use parking_lot::RwLockUpgradableReadGuard; +use parking_lot::RwLockWriteGuard; +use tokio::sync::broadcast; +use tokio::sync::oneshot; #[cfg(not(madsim))] use tonic::transport::ClientTlsConfig; -use tracing::{ - debug, error, - log::{log_enabled, Level}, - trace, warn, -}; +use tracing::debug; +use tracing::error; +use tracing::log::log_enabled; +use tracing::log::Level; +use tracing::trace; +use tracing::warn; +use utils::barrier::IdBarrier; +use utils::config::CurpConfig; +use utils::parking_lot_lock::MutexMap; +use utils::parking_lot_lock::RwLockMap; +use utils::task_manager::TaskManager; #[cfg(madsim)] use utils::ClientTlsConfig; -use utils::{ - barrier::IdBarrier, - config::CurpConfig, - parking_lot_lock::{MutexMap, RwLockMap}, - task_manager::TaskManager, -}; - -use self::{ - log::Log, - state::{CandidateState, LeaderState, State}, -}; -use super::{ - cmd_board::CommandBoard, - conflict::{spec_pool_new::SpeculativePool, uncommitted_pool::UncommittedPool}, - curp_node::TaskType, - lease_manager::LeaseManagerRef, - storage::StorageApi, - DB, -}; -use crate::{ - cmd::Command, - log_entry::{EntryData, LogEntry}, - members::{ClusterInfo, ServerId}, - quorum, recover_quorum, - response::ResponseSender, - role_change::RoleChange, - rpc::{ - connect::{InnerConnectApi, InnerConnectApiWrapper}, - ConfChange, ConfChangeType, CurpError, IdSet, Member, PoolEntry, ProposeId, PublishRequest, - ReadState, Redirect, - }, - server::{ - cmd_board::CmdBoardRef, - metrics, - raw_curp::{log::FallbackContext, state::VoteResult}, - }, - snapshot::{Snapshot, SnapshotMeta}, - LogIndex, -}; + +use self::log::Log; +use self::state::CandidateState; +use self::state::LeaderState; +use self::state::State; +use super::cmd_board::CommandBoard; +use super::conflict::spec_pool_new::SpeculativePool; +use super::conflict::uncommitted_pool::UncommittedPool; +use super::curp_node::TaskType; +use super::lease_manager::LeaseManagerRef; +use super::storage::StorageApi; +use super::DB; +use crate::cmd::Command; +use crate::log_entry::EntryData; +use crate::log_entry::LogEntry; +use crate::member::MembershipState; +use crate::members::ClusterInfo; +use crate::members::ServerId; +use crate::quorum; +use crate::recover_quorum; +use crate::response::ResponseSender; +use crate::role_change::RoleChange; +use crate::rpc::connect::InnerConnectApi; +use crate::rpc::connect::InnerConnectApiWrapper; +use crate::rpc::ConfChange; +use crate::rpc::ConfChangeType; +use crate::rpc::CurpError; +use crate::rpc::IdSet; +use crate::rpc::Member; +use crate::rpc::PoolEntry; +use crate::rpc::ProposeId; +use crate::rpc::PublishRequest; +use crate::rpc::ReadState; +use crate::rpc::Redirect; +use crate::server::cmd_board::CmdBoardRef; +use crate::server::metrics; +use crate::server::raw_curp::log::FallbackContext; +use crate::server::raw_curp::state::VoteResult; +use crate::snapshot::Snapshot; +use crate::snapshot::SnapshotMeta; +use crate::LogIndex; /// Curp state mod state; @@ -87,6 +101,9 @@ mod log; #[cfg(test)] mod tests; +/// Membership implementation +mod member_impl; + /// Default Size of channel const CHANGE_CHANNEL_SIZE: usize = 128; @@ -107,6 +124,8 @@ pub struct RawCurp { ctx: Context, /// Task manager task_manager: Arc, + /// Membership state + ms: RwLock, } /// Tmp struct for building `RawCurp` @@ -200,6 +219,7 @@ impl RawCurpBuilder { log, ctx, task_manager: args.task_manager, + ms: RwLock::default(), }; if args.is_leader { @@ -577,14 +597,14 @@ impl RawCurp { /// Persistent log entries /// - /// NOTE: A `&Log` is required because we do not want the `Log` structure gets mutated - /// during the persistent + /// NOTE: A `&Log` is required because we do not want the `Log` structure + /// gets mutated during the persistent #[allow(clippy::panic)] #[allow(dropping_references)] fn persistent_log_entries(&self, entries: &[&LogEntry], _log: &Log) { - // We panic when the log persistence fails because it likely indicates an unrecoverable error. - // Our WAL implementation does not support rollback on failure, as a file write syscall is not - // guaranteed to be atomic. + // We panic when the log persistence fails because it likely indicates an + // unrecoverable error. Our WAL implementation does not support rollback + // on failure, as a file write syscall is not guaranteed to be atomic. if let Err(e) = self.ctx.curp_storage.put_log_entries(entries) { panic!("log persistent failed: {e}"); } @@ -603,6 +623,14 @@ impl RawCurp { self.ctx.id_barrier.wait_all(conflict_cmds) } + /// Wait for propose id synced + pub(super) fn wait_propose_ids>( + &self, + propose_ids: Ids, + ) -> impl Future + Send { + self.ctx.id_barrier.wait_all(propose_ids) + } + /// Wait all logs in previous term have been applied to state machine pub(super) fn wait_no_op_applied(&self) -> Box + Send + Unpin> { // if the leader is at term 1, it won't commit a no-op log @@ -879,7 +907,8 @@ impl RawCurp { /// Handle `vote` /// Return `Ok(term, spec_pool)` if the vote is granted /// Return `Err(Some(term))` if the vote is rejected - /// The `Err(None)` will never be returned here, just to keep the return type consistent with the `handle_pre_vote` + /// The `Err(None)` will never be returned here, just to keep the return + /// type consistent with the `handle_pre_vote` pub(super) fn handle_vote( &self, term: u64, @@ -969,7 +998,8 @@ impl RawCurp { EntryData::Empty | EntryData::Command(_) | EntryData::Shutdown - | EntryData::SetNodeState(_, _, _) => false, + | EntryData::SetNodeState(_, _, _) + | EntryData::Member(_) => false, }); // extra check to shutdown removed node if !contains_candidate && !remove_candidate_is_not_committed { @@ -1130,7 +1160,8 @@ impl RawCurp { } /// Handle `install_snapshot` resp - /// Return Err(()) if the current node isn't a leader or current term is less than the given term + /// Return Err(()) if the current node isn't a leader or current term is + /// less than the given term pub(super) fn handle_snapshot_resp( &self, follower_id: ServerId, @@ -1263,7 +1294,8 @@ impl RawCurp { self.ctx.leader_tx.subscribe() } - /// Get `append_entries` request for `follower_id` that contains the latest log entries + /// Get `append_entries` request for `follower_id` that contains the latest + /// log entries pub(super) fn sync(&self, follower_id: ServerId) -> Option> { let term = { let st_r = self.st.read(); @@ -1290,9 +1322,10 @@ impl RawCurp { ) }); // TODO: buffer a local snapshot: if a follower is down for a long time, - // the leader will take a snapshot itself every time `sync` is called in effort to - // calibrate it. Since taking a snapshot will block the leader's execute workers, we should - // not take snapshot so often. A better solution would be to keep a snapshot cache. + // the leader will take a snapshot itself every time `sync` is called in effort + // to calibrate it. Since taking a snapshot will block the leader's + // execute workers, we should not take snapshot so often. A better + // solution would be to keep a snapshot cache. let meta = SnapshotMeta { last_included_index: entry.index, last_included_term: entry.term, @@ -1874,7 +1907,8 @@ impl RawCurp { EntryData::ConfChange(_) | EntryData::Shutdown | EntryData::Empty - | EntryData::SetNodeState(_, _, _) => {} + | EntryData::SetNodeState(_, _, _) + | EntryData::Member(_) => {} } } } @@ -1919,9 +1953,10 @@ impl RawCurp { /// Switch to a new config and return old member infos for fallback /// - /// FIXME: The state of `ctx.cluster_info` might be inconsistent with the log. A potential - /// fix would be to include the entire cluster info in the conf change log entry and - /// overwrite `ctx.cluster_info` when switching + /// FIXME: The state of `ctx.cluster_info` might be inconsistent with the + /// log. A potential fix would be to include the entire cluster info in + /// the conf change log entry and overwrite `ctx.cluster_info` when + /// switching fn switch_config(&self, conf_change: ConfChange) -> Option<(Vec, String, bool)> { let node_id = conf_change.node_id; let mut cst_l = self.cst.lock(); @@ -1987,7 +2022,8 @@ impl RawCurp { .change_tx .send(conf_change) .unwrap_or_else(|_e| unreachable!("change_rx should not be dropped")); - // TODO: We could wrap lst inside a role checking to prevent accidental lst mutation + // TODO: We could wrap lst inside a role checking to prevent accidental lst + // mutation if self.is_leader() && self .lst @@ -2055,7 +2091,8 @@ impl RawCurp { self.update_index_single_node(log_w, index, term); } - /// Process deduplication and acknowledge the `first_incomplete` for this client id + /// Process deduplication and acknowledge the `first_incomplete` for this + /// client id pub(crate) fn deduplicate( &self, ProposeId(client_id, seq_num): ProposeId, @@ -2066,7 +2103,8 @@ impl RawCurp { let mut cb_w = self.ctx.cb.write(); let tracker = cb_w.tracker(client_id); if tracker.only_record(seq_num) { - // TODO: obtain the previous ER from cmd_board and packed into CurpError::Duplicated as an entry. + // TODO: obtain the previous ER from cmd_board and packed into + // CurpError::Duplicated as an entry. return Err(CurpError::duplicated()); } if let Some(first_incomplete) = first_incomplete { diff --git a/crates/utils/src/barrier/id.rs b/crates/utils/src/barrier/id.rs index 5798af042..571e82ded 100644 --- a/crates/utils/src/barrier/id.rs +++ b/crates/utils/src/barrier/id.rs @@ -36,7 +36,10 @@ where /// Wait for a collection of ids. #[inline] - pub fn wait_all(&self, ids: Vec) -> impl Future + Send { + pub fn wait_all>( + &self, + ids: Ids, + ) -> impl Future + Send { let mut barriers_l = self.barriers.lock(); let listeners: FuturesOrdered<_> = ids .into_iter() From b9a21dfba5ee028c3809dd72ce067bf1fe499688 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 29 Jul 2024 18:06:15 +0800 Subject: [PATCH 133/322] feat(xline-client): implement xline member client for leaner requests --- crates/xline-client/examples/member.rs | 23 ++++++ crates/xline-client/src/clients/member.rs | 99 +++++++++++++++++++++++ crates/xline-client/src/clients/mod.rs | 4 + crates/xline-client/src/lib.rs | 15 +++- crates/xline-client/tests/it/main.rs | 1 + crates/xline-client/tests/it/member.rs | 27 +++++++ 6 files changed, 167 insertions(+), 2 deletions(-) create mode 100644 crates/xline-client/examples/member.rs create mode 100644 crates/xline-client/src/clients/member.rs create mode 100644 crates/xline-client/tests/it/member.rs diff --git a/crates/xline-client/examples/member.rs b/crates/xline-client/examples/member.rs new file mode 100644 index 000000000..d2787cc53 --- /dev/null +++ b/crates/xline-client/examples/member.rs @@ -0,0 +1,23 @@ +use anyhow::Result; +use xline_client::{Client, ClientOptions}; + +#[tokio::main] +async fn main() -> Result<()> { + // the name and address of all curp members + let curp_members = ["10.0.0.1:2379", "10.0.0.2:2379", "10.0.0.3:2379"]; + + let client = Client::connect(curp_members, ClientOptions::default()) + .await? + .member_client(); + + let ids = client + .add_learner(vec!["10.0.0.4:2379".to_owned(), "10.0.0.5:2379".to_owned()]) + .await?; + + println!("got node ids of new learners: {ids:?}"); + + // Remove the previously added learners + client.remove_learner(ids).await?; + + Ok(()) +} diff --git a/crates/xline-client/src/clients/member.rs b/crates/xline-client/src/clients/member.rs new file mode 100644 index 000000000..173be4bef --- /dev/null +++ b/crates/xline-client/src/clients/member.rs @@ -0,0 +1,99 @@ +use std::sync::Arc; + +use xlineapi::command::CurpClient; + +use crate::error::Result; + +/// Client for member operations. +#[derive(Clone)] +pub struct MemberClient { + /// The client running the CURP protocol, communicate with all servers. + curp_client: Arc, +} + +impl MemberClient { + /// New `MemberClient` + #[inline] + pub(crate) fn new(curp_client: Arc) -> Self { + Self { curp_client } + } + + /// Adds some learners to the cluster. + /// + /// # Errors + /// + /// This function will return an error if the inner CURP client encountered a propose failure + /// + /// # Examples + /// + /// ```no_run + /// use anyhow::Result; + /// use xline_client::{Client, ClientOptions}; + /// + /// #[tokio::main] + /// async fn main() -> Result<()> { + /// // the name and address of all curp members + /// let curp_members = ["10.0.0.1:2379", "10.0.0.2:2379", "10.0.0.3:2379"]; + /// + /// let mut client = Client::connect(curp_members, ClientOptions::default()) + /// .await? + /// .member_client(); + /// + /// let ids = client + /// .add_learner(vec!["10.0.0.4:2379".to_owned(), "10.0.0.5:2379".to_owned()]) + /// .await?; + /// + /// println!("got node ids of new learners: {ids:?}"); + /// + /// Ok(()) + /// } + /// ``` + #[inline] + pub async fn add_learner(&self, addrs: Vec) -> Result> { + self.curp_client + .add_learner(addrs) + .await + .map_err(Into::into) + } + + /// Removes some learners from the cluster. + /// + /// # Errors + /// + /// This function will return an error if the inner CURP client encountered a propose failure + /// + /// # Examples + /// + /// ```no_run + /// use anyhow::Result; + /// use xline_client::{Client, ClientOptions}; + /// + /// #[tokio::main] + /// async fn main() -> Result<()> { + /// // the name and address of all curp members + /// let curp_members = ["10.0.0.1:2379", "10.0.0.2:2379", "10.0.0.3:2379"]; + /// + /// let mut client = Client::connect(curp_members, ClientOptions::default()) + /// .await? + /// .member_client(); + /// + /// client.remove_learner(vec![0, 1, 2]).await?; + /// + /// Ok(()) + /// } + /// ``` + #[inline] + pub async fn remove_learner(&self, ids: Vec) -> Result<()> { + self.curp_client + .remove_learner(ids) + .await + .map_err(Into::into) + } +} + +impl std::fmt::Debug for MemberClient { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("MemberClient").finish() + } +} diff --git a/crates/xline-client/src/clients/mod.rs b/crates/xline-client/src/clients/mod.rs index 8a2ce51b3..634b257d6 100644 --- a/crates/xline-client/src/clients/mod.rs +++ b/crates/xline-client/src/clients/mod.rs @@ -5,6 +5,7 @@ pub use kv::KvClient; pub use lease::LeaseClient; pub use lock::{LockClient, Session, Xutex}; pub use maintenance::MaintenanceClient; +pub use member::MemberClient; pub use watch::WatchClient; /// Auth client. @@ -24,5 +25,8 @@ mod maintenance; /// Watch client. mod watch; +/// New Membership client. +mod member; + /// Default session ttl pub const DEFAULT_SESSION_TTL: i64 = 60; diff --git a/crates/xline-client/src/lib.rs b/crates/xline-client/src/lib.rs index b3fd70ed3..7f81e4acb 100644 --- a/crates/xline-client/src/lib.rs +++ b/crates/xline-client/src/lib.rs @@ -180,7 +180,7 @@ use xlineapi::command::{Command, CurpClient}; use crate::{ clients::{ AuthClient, ClusterClient, ElectionClient, KvClient, LeaseClient, LockClient, - MaintenanceClient, WatchClient, + MaintenanceClient, MemberClient, WatchClient, }, error::XlineClientBuildError, }; @@ -214,6 +214,8 @@ pub struct Client { cluster: ClusterClient, /// Election client election: ElectionClient, + /// Member client + member: MemberClient, } impl Client { @@ -274,11 +276,12 @@ impl Client { token.clone(), id_gen, ); - let auth = AuthClient::new(curp_client, channel.clone(), token.clone()); + let auth = AuthClient::new(Arc::clone(&curp_client), channel.clone(), token.clone()); let maintenance = MaintenanceClient::new(channel.clone(), token.clone()); let cluster = ClusterClient::new(channel.clone(), token.clone()); let watch = WatchClient::new(channel, token); let election = ElectionClient::new(); + let member = MemberClient::new(curp_client); Ok(Self { kv, @@ -289,6 +292,7 @@ impl Client { watch, cluster, election, + member, }) } @@ -364,6 +368,13 @@ impl Client { pub fn election_client(&self) -> ElectionClient { self.election.clone() } + + /// Gets a member client. + #[inline] + #[must_use] + pub fn member_client(&self) -> MemberClient { + self.member.clone() + } } /// Options for a client connection diff --git a/crates/xline-client/tests/it/main.rs b/crates/xline-client/tests/it/main.rs index 3d7b06394..f452e2509 100644 --- a/crates/xline-client/tests/it/main.rs +++ b/crates/xline-client/tests/it/main.rs @@ -4,4 +4,5 @@ mod kv; mod lease; mod lock; mod maintenance; +mod member; mod watch; diff --git a/crates/xline-client/tests/it/member.rs b/crates/xline-client/tests/it/member.rs new file mode 100644 index 000000000..fb570f9c9 --- /dev/null +++ b/crates/xline-client/tests/it/member.rs @@ -0,0 +1,27 @@ +use test_macros::abort_on_panic; +use xline_client::error::Result; + +use super::common::get_cluster_client; + +#[tokio::test(flavor = "multi_thread")] +#[abort_on_panic] +async fn learner_add_and_remove_are_ok() -> Result<()> { + let (_cluster, client) = get_cluster_client().await.unwrap(); + let client = client.member_client(); + + let ids = client + .add_learner(vec!["10.0.0.4:2379".to_owned(), "10.0.0.5:2379".to_owned()]) + .await + .expect("failed to add learners"); + + let added = ids.len(); + assert_eq!(added, 2, "expected 2 learners to be added, got {added}"); + + // Remove the previously added learners + client + .remove_learner(ids) + .await + .expect("failed to remove learners"); + + Ok(()) +} From b5da08b573cfd0e608e6d003071444bdbd4dfce6 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 31 Jul 2024 09:09:00 +0800 Subject: [PATCH 134/322] feat: implement joint quorum set refactor: use BTree alternative for HashSet and HashMap for quorum implementation --- crates/curp/src/lib.rs | 3 ++ crates/curp/src/member.rs | 22 ++++++--- crates/curp/src/quorum.rs | 95 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 7 deletions(-) create mode 100644 crates/curp/src/quorum.rs diff --git a/crates/curp/src/lib.rs b/crates/curp/src/lib.rs index 868e763b6..1250571c4 100644 --- a/crates/curp/src/lib.rs +++ b/crates/curp/src/lib.rs @@ -209,6 +209,9 @@ mod response; /// Membership state mod member; +/// Quorum definitions +mod quorum; + /// Calculate the super quorum #[inline] #[must_use] diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 1b5bb5a4e..8af37e06a 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -1,11 +1,14 @@ -use std::collections::hash_map::Entry; -use std::collections::HashMap; -use std::collections::HashSet; +use std::collections::btree_map::Entry; +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::hash::Hash; use curp_external_api::LogIndex; use serde::Deserialize; use serde::Serialize; +use crate::quorum::Joint; + /// Membership state stored in current node #[derive(Debug, Default)] pub(crate) struct MembershipState { @@ -62,13 +65,12 @@ impl MembershipState { } /// Membership config -#[derive(Clone, Debug, Default, Serialize, Deserialize)] -#[cfg_attr(test, derive(PartialEq))] +#[derive(Clone, Debug, Default, Serialize, Deserialize, Eq, PartialEq, Hash)] pub(crate) struct Membership { /// Member of the cluster - members: Vec>, + pub(crate) members: Vec>, /// All Nodes, including members and learners - nodes: HashMap, + pub(crate) nodes: BTreeMap, } impl Membership { @@ -102,6 +104,12 @@ impl Membership { } } } + + #[allow(unused)] + /// Converts to `Joint` + pub(crate) fn as_joint(&self) -> Joint, &[BTreeSet]> { + Joint::new(self.members.as_slice()) + } } /// The change of membership diff --git a/crates/curp/src/quorum.rs b/crates/curp/src/quorum.rs new file mode 100644 index 000000000..20bd7d17a --- /dev/null +++ b/crates/curp/src/quorum.rs @@ -0,0 +1,95 @@ +use std::collections::BTreeSet; +use std::marker::PhantomData; + +/// A joint quorum set +pub(crate) struct Joint { + /// The quorum sets + sets: I, + /// The type of the quorum set + _qs_type: PhantomData, +} + +impl Joint { + /// Create a new `Joint` + pub(crate) fn new(sets: I) -> Self { + Self { + sets, + _qs_type: PhantomData, + } + } +} + +/// A quorum set +pub(crate) trait QuorumSet { + /// Check if the given set of ids forms a quorum + /// + /// A quorum must contains at least f + 1 replicas + fn is_quorum(&self, ids: I) -> bool; + + /// Check if the given set of ids forms a super quorum + /// + /// A super quorum must contains at least f + ⌈f/2⌉ + 1 replicas + fn is_super_quorum(&self, ids: I) -> bool; + + /// Check if the given set of ids forms a recover quorum + /// + /// A recover quorum must contains at least ⌈f/2⌉ + 1 replicas + fn is_recover_quorum(&self, ids: I) -> bool; +} + +#[allow(clippy::arithmetic_side_effects)] +impl QuorumSet for BTreeSet +where + I: Iterator + Clone, +{ + fn is_quorum(&self, ids: I) -> bool { + let num = ids.into_iter().filter(|id| self.contains(id)).count(); + num * 2 > self.len() + } + + fn is_super_quorum(&self, ids: I) -> bool { + let num = ids.into_iter().filter(|id| self.contains(id)).count(); + num * 4 > 3 * self.len() + } + + fn is_recover_quorum(&self, ids: I) -> bool { + let num = ids.into_iter().filter(|id| self.contains(id)).count(); + num * 4 - 2 > self.len() + } +} + +impl QuorumSet for Joint> +where + I: Iterator + Clone, + QS: QuorumSet, +{ + fn is_quorum(&self, ids: I) -> bool { + self.sets.iter().all(|s| s.is_quorum(ids.clone())) + } + + fn is_super_quorum(&self, ids: I) -> bool { + self.sets.iter().all(|s| s.is_super_quorum(ids.clone())) + } + + fn is_recover_quorum(&self, ids: I) -> bool { + self.sets.iter().all(|s| s.is_recover_quorum(ids.clone())) + } +} + +impl QuorumSet for Joint +where + I: Iterator + Clone, + QS: QuorumSet, +{ + fn is_quorum(&self, ids: I) -> bool { + self.sets.iter().all(|s| s.is_quorum(ids.clone())) + } + + fn is_super_quorum(&self, ids: I) -> bool { + self.sets.iter().all(|s| s.is_super_quorum(ids.clone())) + } + + fn is_recover_quorum(&self, ids: I) -> bool { + self.sets.iter().all(|s| s.is_recover_quorum(ids.clone())) + } +} From 98e6925dae66ad8d3812b4281be7fe09c8a93872 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 10 Sep 2024 21:21:42 +0800 Subject: [PATCH 135/322] feat(curp/client): implement client propose of new membership change state Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/cluster_state.rs | 56 +++++++++++++++++++- crates/curp/src/client/unary/propose_impl.rs | 46 ++++++++-------- crates/curp/src/lib.rs | 10 ---- crates/curp/src/member.rs | 10 ++++ crates/curp/src/quorum.rs | 6 +-- 5 files changed, 89 insertions(+), 39 deletions(-) diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index 2fb07d8c7..85d927d64 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -1,9 +1,11 @@ use std::{collections::HashMap, sync::Arc}; -use futures::{stream::FuturesUnordered, Future}; +use futures::{stream::FuturesUnordered, Future, FutureExt, StreamExt}; use crate::{ + member::Membership, members::ServerId, + quorum::QuorumSet, rpc::{connect::ConnectApi, connects, CurpError}, }; @@ -73,6 +75,8 @@ impl std::fmt::Debug for ClusterStateInit { /// The cluster state that is ready for client propose #[derive(Clone, Default)] pub(crate) struct ClusterStateReady { + /// The membership state + membership: Membership, /// Leader id. leader: ServerId, /// Term, initialize to 0, calibrated by the server. @@ -112,6 +116,7 @@ impl ClusterStateReady { connects: HashMap>, ) -> Self { Self { + membership: Membership::default(), // FIXME: build initial membership config leader, term, cluster_version, @@ -159,6 +164,55 @@ impl ClusterStateReady { .collect() } + /// Execute an operation on each follower, until a quorum is reached. + /// + /// Parameters: + /// - f: Operation to execute on each follower's connection + /// - filter: Function to filter on each response + /// - quorum: Function to determine if a quorum is reached, use functions in `QuorumSet` trait + /// + /// Returns `true` if then given quorum is reached. + pub(crate) async fn for_each_follower_with_quorum, F, Q>( + self, + mut f: impl FnMut(Arc) -> Fut, + mut filter: F, + mut expect_quorum: Q, + ) -> bool + where + F: FnMut(R) -> bool, + Q: FnMut(&dyn QuorumSet>, Vec) -> bool, + { + let qs = self.membership.as_joint(); + let leader_id = self.leader_id(); + + #[allow(clippy::pattern_type_mismatch)] + let stream: FuturesUnordered<_> = self + .member_connects() + .filter(|(id, _)| *id != leader_id) + .map(|(id, conn)| f(Arc::clone(conn)).map(move |r| (id, r))) + .collect(); + + let mut filtered = + stream.filter_map(|(id, r)| futures::future::ready(filter(r).then_some(id))); + + let mut ids = vec![leader_id]; + while let Some(id) = filtered.next().await { + ids.push(id); + if expect_quorum(&qs, ids.clone()) { + return true; + } + } + + false + } + + /// Gets member connects + fn member_connects(&self) -> impl Iterator)> { + self.membership + .members() + .filter_map(|(id, _)| self.connects.get(&id).map(|c| (id, c))) + } + /// Returns the quorum size based on the given quorum function /// /// NOTE: Do not update the cluster in between an `for_each_xxx` and an `get_quorum`, which may diff --git a/crates/curp/src/client/unary/propose_impl.rs b/crates/curp/src/client/unary/propose_impl.rs index e6d585f6e..a9149e2dc 100644 --- a/crates/curp/src/client/unary/propose_impl.rs +++ b/crates/curp/src/client/unary/propose_impl.rs @@ -1,13 +1,12 @@ -use std::pin::Pin; +use std::{pin::Pin, sync::Arc}; use curp_external_api::cmd::Command; use futures::{future, stream, FutureExt, Stream, StreamExt}; use crate::{ client::{connect::ProposeResponse, retry::Context}, - quorum, - rpc::{CurpError, OpResponse, ProposeRequest, RecordRequest, ResponseOp}, - super_quorum, + quorum::QuorumSet, + rpc::{connect::ConnectApi, CurpError, OpResponse, ProposeRequest, RecordRequest, ResponseOp}, }; use super::Unary; @@ -164,17 +163,16 @@ impl Unary { /// Returns `true` if the read index is successful async fn send_read_index(&self, ctx: &Context) -> bool { let term = ctx.cluster_state().term(); - let quorum = ctx.cluster_state().get_quorum(quorum); - let expect = quorum.wrapping_sub(1); let timeout = self.config.propose_timeout(); + let read_index = + move |conn: Arc| async move { conn.read_index(timeout).await }; ctx.cluster_state() - .for_each_follower(|conn| async move { conn.read_index(timeout).await }) - .filter_map(|res| future::ready(res.ok())) - .filter(|resp| future::ready(resp.get_ref().term == term)) - .take(expect) - .count() - .map(|c| c >= expect) + .for_each_follower_with_quorum( + read_index, + move |res| res.is_ok_and(|resp| resp.get_ref().term == term), + |qs, ids| QuorumSet::is_quorum(qs, ids), + ) .await } @@ -182,23 +180,21 @@ impl Unary { /// /// Returns a stream that yield a single event fn send_record(&self, cmd: &C, ctx: &Context) -> EventStream<'_, C> { - let superquorum = ctx.cluster_state().get_quorum(super_quorum); let timeout = self.config.propose_timeout(); let record_req = RecordRequest::new::(ctx.propose_id(), cmd); - let expect = superquorum.wrapping_sub(1); + let record = move |conn: Arc| { + let record_req_c = record_req.clone(); + async move { conn.record(record_req_c, timeout).await } + }; + let stream = ctx .cluster_state() - .for_each_follower(|conn| { - let record_req_c = record_req.clone(); - async move { conn.record(record_req_c, timeout).await } - }) - .filter_map(|res| future::ready(res.ok())) - .filter(|resp| future::ready(!resp.get_ref().conflict)) - .take(expect) - .count() - .map(move |c| ProposeEvent::Record { - conflict: c < expect, - }) + .for_each_follower_with_quorum( + record, + |res| res.is_ok_and(|resp| !resp.get_ref().conflict), + |qs, ids| QuorumSet::is_super_quorum(qs, ids), + ) + .map(move |conflict| ProposeEvent::Record { conflict }) .map(Ok) .into_stream(); diff --git a/crates/curp/src/lib.rs b/crates/curp/src/lib.rs index 1250571c4..4be9e9b0c 100644 --- a/crates/curp/src/lib.rs +++ b/crates/curp/src/lib.rs @@ -212,15 +212,6 @@ mod member; /// Quorum definitions mod quorum; -/// Calculate the super quorum -#[inline] -#[must_use] -#[allow(clippy::arithmetic_side_effects)] // it's safe -fn super_quorum(size: usize) -> usize { - let fault_tolerance = size - quorum(size); - fault_tolerance + recover_quorum(size) -} - /// Calculate the quorum #[inline] #[must_use] @@ -259,7 +250,6 @@ mod test { for (node_cnt, expected) in nodes.into_iter().zip(expected_res.into_iter()) { assert_eq!(quorum(node_cnt), expected.0); assert_eq!(recover_quorum(node_cnt), expected.1); - assert_eq!(super_quorum(node_cnt), expected.2); } } } diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 8af37e06a..b7f8e6585 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -110,6 +110,16 @@ impl Membership { pub(crate) fn as_joint(&self) -> Joint, &[BTreeSet]> { Joint::new(self.members.as_slice()) } + + /// Gets the addresses of all members + pub(crate) fn members(&self) -> impl Iterator { + self.nodes.iter().filter_map(|(id, addr)| { + self.members + .iter() + .any(|m| m.contains(id)) + .then_some((*id, addr)) + }) + } } /// The change of membership diff --git a/crates/curp/src/quorum.rs b/crates/curp/src/quorum.rs index 20bd7d17a..d322fad73 100644 --- a/crates/curp/src/quorum.rs +++ b/crates/curp/src/quorum.rs @@ -40,7 +40,7 @@ pub(crate) trait QuorumSet { #[allow(clippy::arithmetic_side_effects)] impl QuorumSet for BTreeSet where - I: Iterator + Clone, + I: IntoIterator + Clone, { fn is_quorum(&self, ids: I) -> bool { let num = ids.into_iter().filter(|id| self.contains(id)).count(); @@ -60,7 +60,7 @@ where impl QuorumSet for Joint> where - I: Iterator + Clone, + I: IntoIterator + Clone, QS: QuorumSet, { fn is_quorum(&self, ids: I) -> bool { @@ -78,7 +78,7 @@ where impl QuorumSet for Joint where - I: Iterator + Clone, + I: IntoIterator + Clone, QS: QuorumSet, { fn is_quorum(&self, ids: I) -> bool { From c33ce2c2b7c952685bd987e64ea7f0faca65c0b9 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 2 Aug 2024 10:16:21 +0800 Subject: [PATCH 136/322] feat: implement fetch membership on the server --- crates/curp/proto/common | 2 +- crates/curp/src/rpc/mod.rs | 4 +++ crates/curp/src/server/curp_node/mod.rs | 43 +++++++++++++++++++++---- crates/curp/src/server/mod.rs | 12 +++++++ crates/curp/src/server/raw_curp/mod.rs | 6 ++++ crates/xline/src/server/auth_wrapper.rs | 18 ++++++++--- 6 files changed, 73 insertions(+), 12 deletions(-) diff --git a/crates/curp/proto/common b/crates/curp/proto/common index f623076ee..81a9434fa 160000 --- a/crates/curp/proto/common +++ b/crates/curp/proto/common @@ -1 +1 @@ -Subproject commit f623076eee58d90f284054861575f3e73d4b7c80 +Subproject commit 81a9434fa5d3974b09d23fe97c4be0e9c027d65d diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index 095a30c11..0c84476f7 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -32,12 +32,15 @@ pub use self::proto::{ CmdResult, FetchClusterRequest, FetchClusterResponse, + FetchMembershipRequest, + FetchMembershipResponse, FetchReadStateRequest, FetchReadStateResponse, LeaseKeepAliveMsg, Member, MoveLeaderRequest, MoveLeaderResponse, + Node, OpResponse, OptionalU64, ProposeConfChangeRequest, @@ -47,6 +50,7 @@ pub use self::proto::{ ProposeResponse, PublishRequest, PublishResponse, + QuorumSet, ReadIndexRequest, ReadIndexResponse, RecordRequest, diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 504565920..81cb284d8 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -40,6 +40,7 @@ use super::{ use crate::{ cmd::{Command, CommandExecutor}, log_entry::{EntryData, LogEntry}, + member::Membership, members::{ClusterInfo, ServerId}, response::ResponseSender, role_change::RoleChange, @@ -47,12 +48,13 @@ use crate::{ self, connect::{InnerConnectApi, InnerConnectApiWrapper}, AppendEntriesRequest, AppendEntriesResponse, ConfChange, ConfChangeType, CurpError, - FetchClusterRequest, FetchClusterResponse, FetchReadStateRequest, FetchReadStateResponse, - InstallSnapshotRequest, InstallSnapshotResponse, LeaseKeepAliveMsg, MoveLeaderRequest, - MoveLeaderResponse, PoolEntry, ProposeConfChangeRequest, ProposeConfChangeResponse, - ProposeId, ProposeRequest, ProposeResponse, PublishRequest, PublishResponse, - ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, ShutdownResponse, - SyncedResponse, TriggerShutdownRequest, TriggerShutdownResponse, TryBecomeLeaderNowRequest, + FetchClusterRequest, FetchClusterResponse, FetchMembershipRequest, FetchMembershipResponse, + FetchReadStateRequest, FetchReadStateResponse, InstallSnapshotRequest, + InstallSnapshotResponse, LeaseKeepAliveMsg, MoveLeaderRequest, MoveLeaderResponse, Node, + PoolEntry, ProposeConfChangeRequest, ProposeConfChangeResponse, ProposeId, ProposeRequest, + ProposeResponse, PublishRequest, PublishResponse, QuorumSet, ReadIndexResponse, + RecordRequest, RecordResponse, ShutdownRequest, ShutdownResponse, SyncedResponse, + TriggerShutdownRequest, TriggerShutdownResponse, TryBecomeLeaderNowRequest, TryBecomeLeaderNowResponse, VoteRequest, VoteResponse, }, server::{ @@ -406,6 +408,35 @@ impl, RC: RoleChange> CurpNode { } Err(CurpError::RpcTransport(())) } + + /// Handles fetch membership requests + pub(super) fn fetch_membership( + &self, + _req: FetchMembershipRequest, + ) -> Result { + let (leader_id, term, _) = self.curp.leader(); + let Membership { members, nodes } = self.curp.effective_membership(); + let members = members + .into_iter() + .map(|s| QuorumSet { + set: s.into_iter().collect(), + }) + .collect(); + let nodes = nodes + .into_iter() + .map(|(node_id, addr)| Node { node_id, addr }) + .collect(); + + let leader_id = + leader_id.ok_or(CurpError::LeaderTransfer("no current leader".to_owned()))?; + + Ok(FetchMembershipResponse { + members, + nodes, + term, + leader_id, + }) + } } /// Handlers for peers diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index 0f1286527..c579a1c58 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -31,6 +31,8 @@ use crate::rpc::AppendEntriesRequest; use crate::rpc::AppendEntriesResponse; use crate::rpc::FetchClusterRequest; use crate::rpc::FetchClusterResponse; +use crate::rpc::FetchMembershipRequest; +use crate::rpc::FetchMembershipResponse; use crate::rpc::FetchReadStateRequest; use crate::rpc::FetchReadStateResponse; use crate::rpc::InstallSnapshotRequest; @@ -226,6 +228,16 @@ impl, RC: RoleChange> crate::rpc::Protocol fo )) } + #[instrument(skip_all, name = "curp_fetch_membership")] + async fn fetch_membership( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + Ok(tonic::Response::new( + self.inner.fetch_membership(request.into_inner())?, + )) + } + #[instrument(skip_all, name = "add_learner")] async fn add_learner( &self, diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index bbe10e44d..6dc156ae8 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -64,6 +64,7 @@ use super::DB; use crate::cmd::Command; use crate::log_entry::EntryData; use crate::log_entry::LogEntry; +use crate::member::Membership; use crate::member::MembershipState; use crate::members::ClusterInfo; use crate::members::ServerId; @@ -1294,6 +1295,11 @@ impl RawCurp { self.ctx.leader_tx.subscribe() } + /// Get the effective membership + pub(super) fn effective_membership(&self) -> Membership { + self.ms.read().effective().clone() + } + /// Get `append_entries` request for `follower_id` that contains the latest /// log entries pub(super) fn sync(&self, follower_id: ServerId) -> Option> { diff --git a/crates/xline/src/server/auth_wrapper.rs b/crates/xline/src/server/auth_wrapper.rs index 9f5c3a135..2cdb75287 100644 --- a/crates/xline/src/server/auth_wrapper.rs +++ b/crates/xline/src/server/auth_wrapper.rs @@ -4,11 +4,12 @@ use curp::{ cmd::PbCodec, rpc::{ AddLearnerRequest, AddLearnerResponse, FetchClusterRequest, FetchClusterResponse, - FetchReadStateRequest, FetchReadStateResponse, LeaseKeepAliveMsg, MoveLeaderRequest, - MoveLeaderResponse, OpResponse, ProposeConfChangeRequest, ProposeConfChangeResponse, - ProposeRequest, Protocol, PublishRequest, PublishResponse, ReadIndexRequest, - ReadIndexResponse, RecordRequest, RecordResponse, RemoveLearnerRequest, - RemoveLearnerResponse, ShutdownRequest, ShutdownResponse, + FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, + FetchReadStateResponse, LeaseKeepAliveMsg, MoveLeaderRequest, MoveLeaderResponse, + OpResponse, ProposeConfChangeRequest, ProposeConfChangeResponse, ProposeRequest, Protocol, + PublishRequest, PublishResponse, ReadIndexRequest, ReadIndexResponse, RecordRequest, + RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, ShutdownRequest, + ShutdownResponse, }, }; use flume::r#async::RecvStream; @@ -123,6 +124,13 @@ impl Protocol for AuthWrapper { self.curp_server.lease_keep_alive(request).await } + async fn fetch_membership( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + self.curp_server.fetch_membership(request).await + } + async fn add_learner( &self, request: tonic::Request, From 68d3072301243cbde7dc9f15cfb878e60fd0a5b4 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 10 Sep 2024 21:33:09 +0800 Subject: [PATCH 137/322] feat: implement fetch membership on client Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/keep_alive.rs | 17 +++++++++++---- crates/curp/src/rpc/connect/mod.rs | 32 ++++++++++++++++++++++++++-- crates/curp/src/rpc/reconnect.rs | 19 ++++++++++++----- 3 files changed, 57 insertions(+), 11 deletions(-) diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index 7a636b1e8..467475bc1 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -155,10 +155,11 @@ mod tests { use crate::rpc::{ connect::{ConnectApi, MockConnectApi}, AddLearnerRequest, AddLearnerResponse, CurpError, FetchClusterRequest, - FetchClusterResponse, FetchReadStateRequest, FetchReadStateResponse, Member, - MoveLeaderRequest, MoveLeaderResponse, OpResponse, ProposeConfChangeRequest, - ProposeConfChangeResponse, ProposeId, ProposeRequest, ProposeResponse, PublishRequest, - PublishResponse, ReadIndexResponse, RecordRequest, RecordResponse, RemoveLearnerRequest, + FetchClusterResponse, FetchMembershipRequest, FetchMembershipResponse, + FetchReadStateRequest, FetchReadStateResponse, Member, MoveLeaderRequest, + MoveLeaderResponse, OpResponse, ProposeConfChangeRequest, ProposeConfChangeResponse, + ProposeId, ProposeRequest, ProposeResponse, PublishRequest, PublishResponse, + ReadIndexResponse, RecordRequest, RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, ResponseOp, ShutdownRequest, ShutdownResponse, SyncedResponse, }; @@ -278,6 +279,14 @@ mod tests { (self.lease_keep_alive_handle)(client_id).await } + async fn fetch_membership( + &self, + _request: FetchMembershipRequest, + _timeout: Duration, + ) -> Result, CurpError> { + unreachable!("please use MockedConnectApi") + } + async fn add_learner( &self, _request: AddLearnerRequest, diff --git a/crates/curp/src/rpc/connect/mod.rs b/crates/curp/src/rpc/connect/mod.rs index 45eef5cf8..4cc5aeb2a 100644 --- a/crates/curp/src/rpc/connect/mod.rs +++ b/crates/curp/src/rpc/connect/mod.rs @@ -51,8 +51,8 @@ use crate::{ use super::{ proto::commandpb::{ReadIndexRequest, ReadIndexResponse}, reconnect::Reconnect, - AddLearnerRequest, AddLearnerResponse, OpResponse, RecordRequest, RecordResponse, - RemoveLearnerRequest, RemoveLearnerResponse, + AddLearnerRequest, AddLearnerResponse, FetchMembershipRequest, FetchMembershipResponse, + OpResponse, RecordRequest, RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, }; /// Install snapshot chunk size: 64KB @@ -238,6 +238,13 @@ pub(crate) trait ConnectApi: Send + Sync + 'static { /// Keep send lease keep alive to server and mutate the client id async fn lease_keep_alive(&self, client_id: u64, interval: Duration) -> Result; + /// Fetches the membership + async fn fetch_membership( + &self, + request: FetchMembershipRequest, + timeout: Duration, + ) -> Result, CurpError>; + /// Add a learner to the cluster. async fn add_learner( &self, @@ -555,6 +562,16 @@ impl ConnectApi for Connect> { Ok(new_id) } + async fn fetch_membership( + &self, + request: FetchMembershipRequest, + timeout: Duration, + ) -> Result, CurpError> { + let mut client = self.rpc_connect.clone(); + let req = tonic::Request::new(request); + with_timeout!(timeout, client.fetch_membership(req)).map_err(Into::into) + } + async fn add_learner( &self, request: AddLearnerRequest, @@ -871,6 +888,17 @@ where Ok(new_id) } + async fn fetch_membership( + &self, + request: FetchMembershipRequest, + _timeout: Duration, + ) -> Result, CurpError> { + let mut req = tonic::Request::new(request); + req.metadata_mut().inject_bypassed(); + req.metadata_mut().inject_current(); + self.server.fetch_membership(req).await.map_err(Into::into) + } + async fn add_learner( &self, request: AddLearnerRequest, diff --git a/crates/curp/src/rpc/reconnect.rs b/crates/curp/src/rpc/reconnect.rs index 22b6b4684..a51350aa6 100644 --- a/crates/curp/src/rpc/reconnect.rs +++ b/crates/curp/src/rpc/reconnect.rs @@ -8,11 +8,11 @@ use crate::{ members::ServerId, rpc::{ connect::ConnectApi, AddLearnerRequest, AddLearnerResponse, CurpError, FetchClusterRequest, - FetchClusterResponse, FetchReadStateRequest, FetchReadStateResponse, MoveLeaderRequest, - MoveLeaderResponse, OpResponse, ProposeConfChangeRequest, ProposeConfChangeResponse, - ProposeRequest, PublishRequest, PublishResponse, ReadIndexResponse, RecordRequest, - RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, ShutdownRequest, - ShutdownResponse, + FetchClusterResponse, FetchMembershipRequest, FetchMembershipResponse, + FetchReadStateRequest, FetchReadStateResponse, MoveLeaderRequest, MoveLeaderResponse, + OpResponse, ProposeConfChangeRequest, ProposeConfChangeResponse, ProposeRequest, + PublishRequest, PublishResponse, ReadIndexResponse, RecordRequest, RecordResponse, + RemoveLearnerRequest, RemoveLearnerResponse, ShutdownRequest, ShutdownResponse, }, }; @@ -183,6 +183,15 @@ impl ConnectApi for Reconnect { } } + /// Fetches the membership + async fn fetch_membership( + &self, + request: FetchMembershipRequest, + timeout: Duration, + ) -> Result, CurpError> { + execute_with_reconnect!(self, ConnectApi::fetch_membership, request, timeout) + } + /// Add a learner to the cluster. async fn add_learner( &self, From f21c8f2603d234f52bdc17f95bac4c2ac583c088 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 10 Sep 2024 21:58:26 +0800 Subject: [PATCH 138/322] feat(curp/client): implement membership update on curp client Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/cluster_state.rs | 16 +++++ crates/curp/src/client/fetch.rs | 79 ++++++++++++++++++++++++- crates/curp/src/rpc/mod.rs | 16 ++++- 3 files changed, 109 insertions(+), 2 deletions(-) diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index 85d927d64..d17fc967f 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -124,6 +124,22 @@ impl ClusterStateReady { } } + /// Creates a new `ClusterState` + pub(crate) fn new_membership( + leader: ServerId, + term: u64, + connects: HashMap>, + membership: Membership, + ) -> Self { + Self { + membership, + leader, + term, + cluster_version: 0, + connects, + } + } + /// Take an async function and map to the dedicated server, return None /// if the server can not found in local state pub(crate) fn map_server>>( diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index 2514afebf..4515d9370 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -9,7 +9,10 @@ use utils::parking_lot_lock::RwLockMap; use crate::{ quorum, - rpc::{self, connect::ConnectApi, CurpError, FetchClusterRequest, FetchClusterResponse}, + rpc::{ + self, connect::ConnectApi, CurpError, FetchClusterRequest, FetchClusterResponse, + FetchMembershipRequest, FetchMembershipResponse, + }, }; use super::cluster_state::{ClusterState, ClusterStateReady, ForEachServer}; @@ -39,6 +42,30 @@ where } } +/// Connect to cluster +/// +/// This is used to build a boxed closure that handles the `FetchClusterResponse` and returns +/// new connections. +pub(super) trait ConnectToClusterNew: + Fn(&FetchMembershipResponse) -> HashMap> + Send + Sync + 'static +{ + /// Clone the value + fn clone_box(&self) -> Box; +} + +impl ConnectToClusterNew for T +where + T: Fn(&FetchMembershipResponse) -> HashMap> + + Clone + + Send + + Sync + + 'static, +{ + fn clone_box(&self) -> Box { + Box::new(self.clone()) + } +} + /// Fetch cluster implementation pub(crate) struct Fetch { /// The fetch timeout @@ -65,6 +92,12 @@ impl Fetch { } } + #[allow(clippy::unimplemented)] // FIXME: implement this + /// Creates a new `Fetch` + pub(crate) fn new_membership(timeout: Duration, connect_to: C) -> Self { + unimplemented!() + } + #[cfg(test)] /// Creates a new `Fetch` fetch disabled pub(crate) fn new_disable() -> Self { @@ -100,6 +133,29 @@ impl Fetch { Err(CurpError::internal("cluster not available")) } + #[allow(clippy::diverging_sub_expression, clippy::todo)] // FIXME: implement + /// Fetch cluster and updates the current state + pub(crate) async fn fetch_membership( + &self, + state: impl ForEachServer, + ) -> Result<(ClusterStateReady, FetchMembershipResponse), CurpError> { + let resp = self + .pre_fetch_membership(&state) + .await + .ok_or(CurpError::internal("cluster not available"))?; + let new_state = ClusterStateReady::new_membership( + resp.leader_id, + resp.term, + todo!("call connect"), + resp.into_membership(), + ); + if self.fetch_term(&new_state).await { + return Ok((new_state, resp)); + } + + Err(CurpError::internal("cluster not available")) + } + /// Fetch the term of the cluster. This ensures that the current leader is the latest. async fn fetch_term(&self, state: &ClusterStateReady) -> bool { let timeout = self.timeout; @@ -138,6 +194,27 @@ impl Fetch { .filter(|resp| !resp.members.is_empty()) .max_by(|x, y| x.term.cmp(&y.term)) } + + /// Prefetch, send fetch cluster request to the cluster and get the + /// config with the greatest quorum. + async fn pre_fetch_membership( + &self, + state: &impl ForEachServer, + ) -> Option { + let timeout = self.timeout; + let requests = state.for_each_server(|c| async move { + c.fetch_membership(FetchMembershipRequest {}, timeout).await + }); + let responses: Vec<_> = requests + .filter_map(|r| future::ready(r.ok())) + .map(Response::into_inner) + .collect() + .await; + responses + .into_iter() + .filter(|resp| !resp.members.is_empty()) + .max_by(|x, y| x.term.cmp(&y.term)) + } } impl std::fmt::Debug for Fetch { diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index 0c84476f7..27c8ea717 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -65,7 +65,7 @@ pub use self::proto::{ }, inner_messagepb::inner_protocol_server::InnerProtocolServer, }; -use crate::{cmd::Command, log_entry::LogEntry, members::ServerId, LogIndex}; +use crate::{cmd::Command, log_entry::LogEntry, member::Membership, members::ServerId, LogIndex}; /// Metrics #[cfg(feature = "client-metrics")] @@ -951,3 +951,17 @@ impl std::fmt::Display for ProposeId { write!(f, "{}#{}", self.0, self.1) } } + +impl FetchMembershipResponse { + /// Consumes self and returns a `Membership` + pub(crate) fn into_membership(self) -> Membership { + let Self { members, nodes, .. } = self; + Membership { + members: members + .into_iter() + .map(|m| m.set.into_iter().collect()) + .collect(), + nodes: nodes.into_iter().map(|n| (n.node_id, n.addr)).collect(), + } + } +} From 18eafbdf8d7d2d0f3386a83f97014a62282a7433 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 5 Aug 2024 18:30:41 +0800 Subject: [PATCH 139/322] refactor(raw_curp): implement appending membership entries on follower feat: implement membership role state update --- crates/curp/src/member.rs | 5 +++ crates/curp/src/server/raw_curp/log.rs | 15 +++++-- .../curp/src/server/raw_curp/member_impl.rs | 43 +++++++++++++++++++ crates/curp/src/server/raw_curp/mod.rs | 14 +++++- 4 files changed, 72 insertions(+), 5 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index b7f8e6585..c0b6f6c02 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -120,6 +120,11 @@ impl Membership { .then_some((*id, addr)) }) } + + /// Returns `true` if the membership contains the given node id + pub(crate) fn contains(&self, node_id: u64) -> bool { + self.nodes.contains_key(&node_id) + } } /// The change of membership diff --git a/crates/curp/src/server/raw_curp/log.rs b/crates/curp/src/server/raw_curp/log.rs index 5d25e3f3b..81062df3e 100644 --- a/crates/curp/src/server/raw_curp/log.rs +++ b/crates/curp/src/server/raw_curp/log.rs @@ -315,8 +315,13 @@ type ConfChangeEntries = Vec>>; /// Fallback indexes type type FallbackIndexes = HashSet; -/// Type returned when append success -type AppendSuccess = (Vec>>, ConfChangeEntries, FallbackIndexes); +/// Type retruned when append success +type AppendSuccess = ( + Vec>>, + ConfChangeEntries, + FallbackIndexes, + LogIndex, +); impl Log { /// Create a new log @@ -408,6 +413,10 @@ impl Log { } // Truncate entries self.truncate(pi); + let truncate_at = self + .entries + .back() + .map_or_else(LogIndex::default, |e| e.inner.index); // Push the remaining entries and record the conf change entries for entry in entries .into_iter() @@ -426,7 +435,7 @@ impl Log { to_persist.push(entry); } - Ok((to_persist, conf_changes, need_fallback_indexes)) + Ok((to_persist, conf_changes, need_fallback_indexes, truncate_at)) } /// Check if the candidate's log is up-to-date diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 321416e5c..fa7e013ba 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -1,12 +1,17 @@ use curp_external_api::cmd::Command; use curp_external_api::role_change::RoleChange; +use curp_external_api::LogIndex; use rand::Rng; +use crate::log_entry::EntryData; +use crate::log_entry::LogEntry; use crate::member::Change; use crate::member::Membership; +use crate::member::MembershipState; use crate::rpc::ProposeId; use super::RawCurp; +use super::Role; impl RawCurp { /// Adds a learner to the membership state @@ -44,6 +49,44 @@ impl RawCurp { let mut ms_w = self.ms.write(); ms_w.update_commit(config); } + + /// Append membership entries + pub(crate) fn append_membership( + &self, + entries: &[LogEntry], + truncate_at: LogIndex, + commit_index: LogIndex, + ) { + let mut ms_w = self.ms.write(); + ms_w.truncate(truncate_at); + let configs = entries.iter().filter_map(|entry| { + if let EntryData::Member(ref m) = entry.entry_data { + Some((entry.index, m.clone())) + } else { + None + } + }); + for (index, config) in configs { + ms_w.append(index, config); + ms_w.commit(commit_index.min(index)); + } + + self.update_role(&ms_w); + } + + /// Updates the role of the node based on the current membership state + fn update_role(&self, current: &MembershipState) { + // FIXME: implement node id + let id = 0; + let mut st_w = self.st.write(); + if current.effective().contains(id) { + if matches!(st_w.role, Role::Learner) { + st_w.role = Role::Follower; + } + } else { + st_w.role = Role::Learner; + } + } } /// Wrapper for the return value of the raw curp methods diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 6dc156ae8..357cf2ed1 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -316,6 +316,11 @@ enum Role { Candidate, /// Leader Leader, + /// Learner + /// + /// A learner is a follower that only receives append entries or install + /// snapshots from the leader, it cannot vote or become a candidate. + Learner, } /// Relevant context for Curp @@ -476,6 +481,7 @@ impl RawCurp { let timeout = match st_r.role { Role::Follower | Role::Leader => st_r.follower_timeout_ticks, Role::PreCandidate | Role::Candidate => st_r.candidate_timeout_ticks, + Role::Learner => return None, }; let tick = self.ctx.election_tick.fetch_add(1, Ordering::AcqRel); if tick < timeout { @@ -493,6 +499,7 @@ impl RawCurp { self.lst.reset_transferee(); None } + Role::Learner => None, } } } @@ -771,6 +778,7 @@ impl RawCurp { /// Handle `append_entries` /// Return `Ok(term, entries)` if succeeds /// Return `Err(term, hint_index)` if fails + #[allow(clippy::needless_pass_by_value)] // TODO: avoid cloning of `entries` pub(super) fn handle_append_entries( &self, term: u64, @@ -816,9 +824,10 @@ impl RawCurp { // append log entries let mut log_w = self.log.write(); - let (to_persist, cc_entries, fallback_indexes) = log_w - .try_append_entries(entries, prev_log_index, prev_log_term) + let (to_persist, cc_entries, fallback_indexes, truncate_at) = log_w + .try_append_entries(entries.clone(), prev_log_index, prev_log_term) .map_err(|_ig| (term, log_w.commit_index + 1))?; + self.append_membership(&entries, truncate_at, leader_commit); // fallback overwritten conf change entries for idx in fallback_indexes.iter().sorted().rev() { let info = log_w.fallback_contexts.remove(idx).unwrap_or_else(|| { @@ -849,6 +858,7 @@ impl RawCurp { if prev_commit_index < log_w.commit_index { self.apply(&mut *log_w); } + Ok((term, to_persist)) } From baaafc8a3d4c1d4a6537c961a7bb78cbca463053 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 8 Aug 2024 10:51:07 +0800 Subject: [PATCH 140/322] refactor(raw_curp): implement append membership entries on leader --- crates/curp/src/server/cmd_worker/mod.rs | 2 +- .../curp/src/server/raw_curp/member_impl.rs | 25 +++++++++++-------- crates/curp/src/server/raw_curp/mod.rs | 2 ++ 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/crates/curp/src/server/cmd_worker/mod.rs b/crates/curp/src/server/cmd_worker/mod.rs index c2fbfe714..2b0f43965 100644 --- a/crates/curp/src/server/cmd_worker/mod.rs +++ b/crates/curp/src/server/cmd_worker/mod.rs @@ -224,7 +224,7 @@ async fn after_sync_others, RC: RoleChange>( } // The no-op command has been applied to state machine (EntryData::Empty, _) => curp.set_no_op_applied(), - (EntryData::Member(config), _) => curp.commit_membership(config.clone()), + (EntryData::Member(_), _) => {} _ => unreachable!(), } diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index fa7e013ba..fad4cf141 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -6,7 +6,6 @@ use rand::Rng; use crate::log_entry::EntryData; use crate::log_entry::LogEntry; use crate::member::Change; -use crate::member::Membership; use crate::member::MembershipState; use crate::rpc::ProposeId; @@ -44,22 +43,20 @@ impl RawCurp { Some(ReturnValueWrapper::new((), propose_id)) } - /// Updates the committed membership - pub(crate) fn commit_membership(&self, config: Membership) { - let mut ms_w = self.ms.write(); - ms_w.update_commit(config); - } - /// Append membership entries - pub(crate) fn append_membership( + pub(crate) fn append_membership( &self, - entries: &[LogEntry], + entries: I, truncate_at: LogIndex, commit_index: LogIndex, - ) { + ) where + E: AsRef>, + I: IntoIterator, + { let mut ms_w = self.ms.write(); ms_w.truncate(truncate_at); - let configs = entries.iter().filter_map(|entry| { + let configs = entries.into_iter().filter_map(|entry| { + let entry = entry.as_ref(); if let EntryData::Member(ref m) = entry.entry_data { Some((entry.index, m.clone())) } else { @@ -74,6 +71,12 @@ impl RawCurp { self.update_role(&ms_w); } + /// Updates the commit index + pub(crate) fn membership_commit_to(&self, index: LogIndex) { + let mut ms_w = self.ms.write(); + ms_w.commit(index); + } + /// Updates the role of the node based on the current membership state fn update_role(&self, current: &MembershipState) { // FIXME: implement node id diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 357cf2ed1..36db19d93 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -592,6 +592,7 @@ impl RawCurp { ); } }); + self.append_membership(&log_entries, u64::MAX, 0); self.entry_process_multi(&mut log_w, &to_process, term); let log_r = RwLockWriteGuard::downgrade(log_w); @@ -907,6 +908,7 @@ impl RawCurp { let mut log_w = RwLockUpgradableReadGuard::upgrade(log_r); if last_sent_index > log_w.commit_index { log_w.commit_to(last_sent_index); + self.membership_commit_to(last_sent_index); debug!("{} updates commit index to {last_sent_index}", self.id()); self.apply(&mut *log_w); } From 3e2c740a6f5465775fbae86efec2cb96dfd22df1 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 8 Aug 2024 15:38:02 +0800 Subject: [PATCH 141/322] refactor: add a `in_flight` method for `MembershipState` --- crates/curp/src/member.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index c0b6f6c02..caef8205d 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -62,6 +62,11 @@ impl MembershipState { pub(crate) fn effective(&self) -> &Membership { &self.effective } + + /// Returns the Some(membership) if there is NO membership change in flight + pub(crate) fn in_flight(&self) -> Option<&Membership> { + (self.effective != self.committed).then_some(&self.committed) + } } /// Membership config From 4496a5b34b0dea5aa903f6d440ed7ec9ab29c943 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 8 Aug 2024 17:35:01 +0800 Subject: [PATCH 142/322] refactor: implement member add and member remove for `Membership` --- crates/curp/src/member.rs | 51 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index caef8205d..9d400dde3 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -105,12 +105,56 @@ impl Membership { let _ignore = nodes.remove(&id)?; } + Some(Self { members, nodes }) + } + Change::AddMember(ids) => { + let mut members = self.members.clone(); + let nodes = self.nodes.clone(); + if self.validate_ids(&ids) { + return None; + } + let mut set = Self::choose_set(&members).clone(); + for id in ids { + let _ignore = set.insert(id); + } + members.push(set); + + Some(Self { members, nodes }) + } + Change::RemoveMember(ids) => { + let mut members = self.members.clone(); + let nodes = self.nodes.clone(); + if self.validate_ids(&ids) { + return None; + } + let mut set = Self::choose_set(&members).clone(); + for id in ids { + let _ignore = set.remove(&id); + } + members.push(set); + Some(Self { members, nodes }) } } } - #[allow(unused)] + /// Choose a quorum set + /// + /// TODO: select the config where the leader is in + fn choose_set(members: &[BTreeSet]) -> &BTreeSet { + members + .last() + .unwrap_or_else(|| unreachable!("there should be at least one member set")) + } + + /// Validates the given ids for member operations + fn validate_ids(&self, ids: &[u64]) -> bool { + // Ids should not be in any member set + ids.iter().all(|id| self.members.iter().all(|s| !s.contains(id))) + // Ids should be in nodes + && ids.iter().all(|id| self.nodes.contains_key(id)) + } + /// Converts to `Joint` pub(crate) fn as_joint(&self) -> Joint, &[BTreeSet]> { Joint::new(self.members.as_slice()) @@ -132,10 +176,15 @@ impl Membership { } } +#[allow(unused)] /// The change of membership pub(crate) enum Change { /// Adds learners AddLearner(Vec<(u64, String)>), /// Removes learners RemoveLearner(Vec), + /// Adds members + AddMember(Vec), + /// Removes members + RemoveMember(Vec), } From b2728641679eeb97bb2b4ba6c31779d3a6d0460c Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 9 Aug 2024 09:07:47 +0800 Subject: [PATCH 143/322] feat: implement member add and remove in raw curp --- crates/curp/proto/common | 2 +- crates/curp/src/member.rs | 106 +++++++++++------- crates/curp/src/quorum.rs | 24 ++++ crates/curp/src/rpc/mod.rs | 4 + .../curp/src/server/curp_node/member_impl.rs | 42 +++++++ crates/curp/src/server/mod.rs | 24 ++++ .../curp/src/server/raw_curp/member_impl.rs | 46 ++++---- crates/curp/src/server/raw_curp/mod.rs | 1 - 8 files changed, 181 insertions(+), 68 deletions(-) diff --git a/crates/curp/proto/common b/crates/curp/proto/common index 81a9434fa..56c792d19 160000 --- a/crates/curp/proto/common +++ b/crates/curp/proto/common @@ -1 +1 @@ -Subproject commit 81a9434fa5d3974b09d23fe97c4be0e9c027d65d +Subproject commit 56c792d19852b7ec1943cac48487e7eb0b5d8ef4 diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 9d400dde3..93839f56a 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -1,7 +1,9 @@ use std::collections::btree_map::Entry; use std::collections::BTreeMap; use std::collections::BTreeSet; +use std::collections::HashSet; use std::hash::Hash; +use std::iter; use curp_external_api::LogIndex; use serde::Deserialize; @@ -82,77 +84,98 @@ impl Membership { /// Generates a new membership from `Change` /// /// Returns `None` if the change is invalid - pub(crate) fn change(&self, change: Change) -> Option { + pub(crate) fn change(&self, change: Change) -> Vec { match change { Change::AddLearner(learners) => { let members = self.members.clone(); let mut nodes = self.nodes.clone(); for (id, addr) in learners { match nodes.entry(id) { - Entry::Occupied(_) => return None, + Entry::Occupied(_) => return vec![], Entry::Vacant(e) => { let _ignore = e.insert(addr); } } } - Some(Self { members, nodes }) + vec![Self { members, nodes }] } Change::RemoveLearner(ids) => { let members = self.members.clone(); let mut nodes = self.nodes.clone(); for id in ids { - let _ignore = nodes.remove(&id)?; + if nodes.remove(&id).is_none() { + return vec![]; + } } - Some(Self { members, nodes }) + vec![Self { members, nodes }] } - Change::AddMember(ids) => { - let mut members = self.members.clone(); - let nodes = self.nodes.clone(); - if self.validate_ids(&ids) { - return None; - } - let mut set = Self::choose_set(&members).clone(); - for id in ids { - let _ignore = set.insert(id); - } - members.push(set); + Change::AddMember(ids) => self.update_members(ids, |i, set| { + set.union(&i.into_iter().collect()).copied().collect() + }), + Change::RemoveMember(ids) => self.update_members(ids, |i, set| { + set.difference(&i.into_iter().collect()).copied().collect() + }), + } + } - Some(Self { members, nodes }) - } - Change::RemoveMember(ids) => { - let mut members = self.members.clone(); - let nodes = self.nodes.clone(); - if self.validate_ids(&ids) { - return None; - } - let mut set = Self::choose_set(&members).clone(); - for id in ids { - let _ignore = set.remove(&id); - } - members.push(set); + /// Updates the membership based on the given operation and returns + /// a vector of coherent memberships. + fn update_members(&self, ids: Vec, op: F) -> Vec + where + F: FnOnce(Vec, BTreeSet) -> BTreeSet, + { + if !self.exists(&ids) { + return vec![]; + } + let last = self.last_set(); + let target = op(ids, last); + self.all_coherent(&target) + } - Some(Self { members, nodes }) - } + /// Generates all coherent membership to reach the target + fn all_coherent(&self, target: &BTreeSet) -> Vec { + iter::successors(Some(self.clone()), |current| { + let next = Self::next_coherent(current, target.clone()); + (current != &next).then_some(next) + }) + .collect() + } + + /// Generates a new coherent membership from a quorum set + fn next_coherent(ms: &Self, set: BTreeSet) -> Self { + let next = ms.as_joint_owned().coherent(set).into_inner(); + let original_ids = ms + .members + .iter() + .flat_map(BTreeSet::iter) + .collect::>(); + let next_ids = next.iter().flat_map(BTreeSet::iter).collect::>(); + let mut nodes = ms.nodes.clone(); + for id in original_ids.difference(&next_ids) { + let _ignore = nodes.remove(id); + } + + Self { + members: next, + nodes, } } - /// Choose a quorum set + /// Returns the last member set /// - /// TODO: select the config where the leader is in - fn choose_set(members: &[BTreeSet]) -> &BTreeSet { - members + fn last_set(&self) -> BTreeSet { + self.members .last() .unwrap_or_else(|| unreachable!("there should be at least one member set")) + .clone() } /// Validates the given ids for member operations - fn validate_ids(&self, ids: &[u64]) -> bool { - // Ids should not be in any member set - ids.iter().all(|id| self.members.iter().all(|s| !s.contains(id))) + fn exists(&self, ids: &[u64]) -> bool { // Ids should be in nodes - && ids.iter().all(|id| self.nodes.contains_key(id)) + ids.iter().all(|id| self.nodes.contains_key(id)) } /// Converts to `Joint` @@ -160,6 +183,11 @@ impl Membership { Joint::new(self.members.as_slice()) } + /// Converts to `Joint` + pub(crate) fn as_joint_owned(&self) -> Joint, Vec>> { + Joint::new(self.members.clone()) + } + /// Gets the addresses of all members pub(crate) fn members(&self) -> impl Iterator { self.nodes.iter().filter_map(|(id, addr)| { diff --git a/crates/curp/src/quorum.rs b/crates/curp/src/quorum.rs index d322fad73..01c9f8b6b 100644 --- a/crates/curp/src/quorum.rs +++ b/crates/curp/src/quorum.rs @@ -17,6 +17,30 @@ impl Joint { _qs_type: PhantomData, } } + + /// Unwrap the inner quorum set + pub(crate) fn into_inner(self) -> I { + self.sets + } +} + +impl Joint> +where + QS: PartialEq + Clone, +{ + /// Generates a new coherent joint quorum set + pub(crate) fn coherent(&self, qs: QS) -> Self { + if self.sets.iter().any(|s| *s == qs) { + Self::new(vec![qs]) + } else { + // TODO: select the config where the leader is in + let last = self + .sets + .last() + .unwrap_or_else(|| unreachable!("there should be at least one quorum set")); + Self::new(vec![last.clone(), qs]) + } + } } /// A quorum set diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index 27c8ea717..6639ccc50 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -29,6 +29,8 @@ pub use self::proto::{ protocol_server::{Protocol, ProtocolServer}, AddLearnerRequest, AddLearnerResponse, + AddMemberRequest, + AddMemberResponse, CmdResult, FetchClusterRequest, FetchClusterResponse, @@ -57,6 +59,8 @@ pub use self::proto::{ RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, + RemoveMemberRequest, + RemoveMemberResponse, ShutdownRequest, ShutdownResponse, SyncedResponse, diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index babc716f7..9b8dffc8a 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -8,11 +8,16 @@ use curp_external_api::cmd::Command; use curp_external_api::cmd::CommandExecutor; use curp_external_api::role_change::RoleChange; +use crate::member::Change; use crate::rpc::AddLearnerRequest; use crate::rpc::AddLearnerResponse; +use crate::rpc::AddMemberRequest; +use crate::rpc::AddMemberResponse; use crate::rpc::CurpError; use crate::rpc::RemoveLearnerRequest; use crate::rpc::RemoveLearnerResponse; +use crate::rpc::RemoveMemberRequest; +use crate::rpc::RemoveMemberResponse; use super::CurpNode; @@ -45,4 +50,41 @@ impl, RC: RoleChange> CurpNode { Ok(RemoveLearnerResponse {}) } + + /// Promotes a learner to a member + pub(crate) async fn add_member( + &self, + request: AddMemberRequest, + ) -> Result { + self.update_and_wait(Change::AddMember(request.node_ids)) + .await?; + + Ok(AddMemberResponse {}) + } + + /// Demotes a member to a learner + pub(crate) async fn remove_member( + &self, + request: RemoveMemberRequest, + ) -> Result { + self.update_and_wait(Change::RemoveMember(request.node_ids)) + .await?; + + Ok(RemoveMemberResponse {}) + } + + /// Updates the membership based on the given change and waits for + /// the proposal to be committed + async fn update_and_wait(&self, change: Change) -> Result<(), CurpError> { + let configs = self.curp.generate_membership(change); + if configs.is_empty() { + return Err(CurpError::invalid_member_change()); + } + for config in configs { + let propose_id = self.curp.update_membership(config); + self.curp.wait_propose_ids(Some(propose_id)).await; + } + + Ok(()) + } } diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index c579a1c58..0ecfa0ec7 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -261,6 +261,30 @@ impl, RC: RoleChange> crate::rpc::Protocol fo .map(tonic::Response::new) .map_err(Into::into) } + + #[instrument(skip_all, name = "add_member")] + async fn add_member( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + self.inner + .add_member(request.into_inner()) + .await + .map(tonic::Response::new) + .map_err(Into::into) + } + + #[instrument(skip_all, name = "remove_member")] + async fn remove_member( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + self.inner + .remove_member(request.into_inner()) + .await + .map(tonic::Response::new) + .map_err(Into::into) + } } #[tonic::async_trait] diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index fad4cf141..4582547c6 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -6,6 +6,7 @@ use rand::Rng; use crate::log_entry::EntryData; use crate::log_entry::LogEntry; use crate::member::Change; +use crate::member::Membership; use crate::member::MembershipState; use crate::rpc::ProposeId; @@ -43,6 +44,24 @@ impl RawCurp { Some(ReturnValueWrapper::new((), propose_id)) } + /// Generate memberships based on the provided change + pub(crate) fn generate_membership(&self, change: Change) -> Vec { + let ms_r = self.ms.read(); + ms_r.committed().change(change) + } + + /// Updates the membership config + pub(crate) fn update_membership(&self, config: Membership) -> ProposeId { + // FIXME: define the lock order of log and ms + let mut log_w = self.log.write(); + let mut ms_w = self.ms.write(); + ms_w.update_effective(config.clone()); + let st_r = self.st.read(); + let propose_id = ProposeId(rand::random(), 0); + let _entry = log_w.push(st_r.term, propose_id, config); + propose_id + } + /// Append membership entries pub(crate) fn append_membership( &self, @@ -92,33 +111,6 @@ impl RawCurp { } } -/// Wrapper for the return value of the raw curp methods -/// -/// It wraps the actual return value and the propose id of the request -pub(crate) struct ReturnValueWrapper { - /// The actual return value - value: T, - /// The propose id of the request - propose_id: ProposeId, -} - -impl ReturnValueWrapper { - /// Creates a new return value wrapper - pub(crate) fn new(value: T, propose_id: ProposeId) -> Self { - Self { value, propose_id } - } - - /// Returns the propose id of the request - pub(crate) fn propose_id(&self) -> ProposeId { - self.propose_id - } - - /// Unwraps the return value - pub(crate) fn into_inner(self) -> T { - self.value - } -} - /// Generate random ids of the given length fn random_ids(n: usize) -> Vec { let mut rng = rand::thread_rng(); diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 36db19d93..d9aa3e3c8 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -592,7 +592,6 @@ impl RawCurp { ); } }); - self.append_membership(&log_entries, u64::MAX, 0); self.entry_process_multi(&mut log_w, &to_process, term); let log_r = RwLockWriteGuard::downgrade(log_w); From 613aa85be8f9c81a94a6cadc971ef1fdf5d10543 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 9 Aug 2024 17:17:38 +0800 Subject: [PATCH 144/322] refactor: rewrite learner add/remove --- .../curp/src/server/curp_node/member_impl.rs | 21 +++++----- .../curp/src/server/raw_curp/member_impl.rs | 40 +++---------------- 2 files changed, 15 insertions(+), 46 deletions(-) diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 9b8dffc8a..8fa4a1bac 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -27,13 +27,14 @@ impl, RC: RoleChange> CurpNode { &self, request: AddLearnerRequest, ) -> Result { - let addrs = request.node_addrs; - let ret = self.curp.add_learner(&addrs); - self.curp.wait_propose_ids(Some(ret.propose_id())).await; + let node_addrs = request.node_addrs; + let node_ids = self.curp.new_node_ids(node_addrs.len()); + self.update_and_wait(Change::AddLearner( + node_ids.clone().into_iter().zip(node_addrs).collect(), + )) + .await?; - Ok(AddLearnerResponse { - node_ids: ret.into_inner(), - }) + Ok(AddLearnerResponse { node_ids }) } /// Removes a learner from the cluster @@ -41,12 +42,8 @@ impl, RC: RoleChange> CurpNode { &self, request: RemoveLearnerRequest, ) -> Result { - let node_ids = request.node_ids; - let ret = self - .curp - .remove_learner(node_ids) - .ok_or(CurpError::invalid_member_change())?; - self.curp.wait_propose_ids(Some(ret.propose_id())).await; + self.update_and_wait(Change::RemoveLearner(request.node_ids)) + .await?; Ok(RemoveLearnerResponse {}) } diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 4582547c6..708c4dfce 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -14,34 +14,12 @@ use super::RawCurp; use super::Role; impl RawCurp { - /// Adds a learner to the membership state - pub(crate) fn add_learner(&self, addrs: &[String]) -> ReturnValueWrapper> { - let mut ms_w = self.ms.write(); - let mut log_w = self.log.write(); - loop { - let ids = random_ids(addrs.len()); - let change = ids.clone().into_iter().zip(addrs.to_owned()).collect(); - let Some(config) = ms_w.committed().change(Change::AddLearner(change)) else { - continue; - }; - ms_w.update_effective(config.clone()); - let st_r = self.st.read(); - let propose_id = ProposeId(rand::random(), 0); - let _entry = log_w.push(st_r.term, propose_id, config); - return ReturnValueWrapper::new(ids, propose_id); - } - } - - /// Removes a learner from the membership state - pub(crate) fn remove_learner(&self, ids: Vec) -> Option> { - let mut ms_w = self.ms.write(); - let mut log_w = self.log.write(); - let config = ms_w.committed().change(Change::RemoveLearner(ids))?; - ms_w.update_effective(config.clone()); - let st_r = self.st.read(); - let propose_id = ProposeId(rand::random(), 0); - let _entry = log_w.push(st_r.term, propose_id, config); - Some(ReturnValueWrapper::new((), propose_id)) + /// Generates new node ids + /// TODO: makes sure that the ids are unique + #[allow(clippy::unused_self)] // it should be used after the previous TODO + pub(crate) fn new_node_ids(&self, n: usize) -> Vec { + let mut rng = rand::thread_rng(); + (0..n).map(|_| rng.gen()).collect() } /// Generate memberships based on the provided change @@ -110,9 +88,3 @@ impl RawCurp { } } } - -/// Generate random ids of the given length -fn random_ids(n: usize) -> Vec { - let mut rng = rand::thread_rng(); - (0..n).map(|_| rng.gen()).collect() -} From a34b186705c39c67ef750ab39e110fdf228ded46 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 9 Aug 2024 18:17:48 +0800 Subject: [PATCH 145/322] refactor: Add a new `NodeMembershipState` in RawCurp This state contains the node id of current node and the membership state of the cluster --- crates/curp/src/member.rs | 41 +++++++++++++++++++ .../curp/src/server/raw_curp/member_impl.rs | 21 +++++----- crates/curp/src/server/raw_curp/mod.rs | 8 ++-- 3 files changed, 55 insertions(+), 15 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 93839f56a..801ddef82 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -11,6 +11,47 @@ use serde::Serialize; use crate::quorum::Joint; +/// The membership state of the node +pub(crate) struct NodeMembershipState { + /// The id of current node + // WARN: This id should be diff from the old `ServerID` + // TODO: use a distinct type for this + node_id: u64, + /// The membership state of the cluster + cluster_state: MembershipState, +} + +impl NodeMembershipState { + /// Creates a new `NodeMembershipState` + // FIXME: specify the node id and initial membership state in node config + pub(crate) fn new() -> Self { + Self { + node_id: 0, + cluster_state: MembershipState::default(), + } + } + + /// Returns the id of the current node + pub(crate) fn node_id(&self) -> u64 { + self.node_id + } + + /// Returns a reference of the membership state + pub(crate) fn cluster(&self) -> &MembershipState { + &self.cluster_state + } + + /// Returns a mutable reference of the membership state + pub(crate) fn cluster_mut(&mut self) -> &mut MembershipState { + &mut self.cluster_state + } + + /// Returns `true` if the current node is a member of the cluster + pub(crate) fn is_member(&self) -> bool { + self.cluster().effective().contains(self.node_id()) + } +} + /// Membership state stored in current node #[derive(Debug, Default)] pub(crate) struct MembershipState { diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 708c4dfce..ff45a7305 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -7,7 +7,7 @@ use crate::log_entry::EntryData; use crate::log_entry::LogEntry; use crate::member::Change; use crate::member::Membership; -use crate::member::MembershipState; +use crate::member::NodeMembershipState; use crate::rpc::ProposeId; use super::RawCurp; @@ -25,7 +25,7 @@ impl RawCurp { /// Generate memberships based on the provided change pub(crate) fn generate_membership(&self, change: Change) -> Vec { let ms_r = self.ms.read(); - ms_r.committed().change(change) + ms_r.cluster().committed().change(change) } /// Updates the membership config @@ -33,7 +33,7 @@ impl RawCurp { // FIXME: define the lock order of log and ms let mut log_w = self.log.write(); let mut ms_w = self.ms.write(); - ms_w.update_effective(config.clone()); + ms_w.cluster_mut().update_effective(config.clone()); let st_r = self.st.read(); let propose_id = ProposeId(rand::random(), 0); let _entry = log_w.push(st_r.term, propose_id, config); @@ -51,7 +51,8 @@ impl RawCurp { I: IntoIterator, { let mut ms_w = self.ms.write(); - ms_w.truncate(truncate_at); + let ms = ms_w.cluster_mut(); + ms.truncate(truncate_at); let configs = entries.into_iter().filter_map(|entry| { let entry = entry.as_ref(); if let EntryData::Member(ref m) = entry.entry_data { @@ -61,8 +62,8 @@ impl RawCurp { } }); for (index, config) in configs { - ms_w.append(index, config); - ms_w.commit(commit_index.min(index)); + ms.append(index, config); + ms.commit(commit_index.min(index)); } self.update_role(&ms_w); @@ -71,15 +72,13 @@ impl RawCurp { /// Updates the commit index pub(crate) fn membership_commit_to(&self, index: LogIndex) { let mut ms_w = self.ms.write(); - ms_w.commit(index); + ms_w.cluster_mut().commit(index); } /// Updates the role of the node based on the current membership state - fn update_role(&self, current: &MembershipState) { - // FIXME: implement node id - let id = 0; + fn update_role(&self, current: &NodeMembershipState) { let mut st_w = self.st.write(); - if current.effective().contains(id) { + if current.is_member() { if matches!(st_w.role, Role::Learner) { st_w.role = Role::Follower; } diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index d9aa3e3c8..d5f0371f6 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -65,7 +65,7 @@ use crate::cmd::Command; use crate::log_entry::EntryData; use crate::log_entry::LogEntry; use crate::member::Membership; -use crate::member::MembershipState; +use crate::member::NodeMembershipState; use crate::members::ClusterInfo; use crate::members::ServerId; use crate::quorum; @@ -126,7 +126,7 @@ pub struct RawCurp { /// Task manager task_manager: Arc, /// Membership state - ms: RwLock, + ms: RwLock, } /// Tmp struct for building `RawCurp` @@ -220,7 +220,7 @@ impl RawCurpBuilder { log, ctx, task_manager: args.task_manager, - ms: RwLock::default(), + ms: RwLock::new(NodeMembershipState::new()), }; if args.is_leader { @@ -1308,7 +1308,7 @@ impl RawCurp { /// Get the effective membership pub(super) fn effective_membership(&self) -> Membership { - self.ms.read().effective().clone() + self.ms.read().cluster().effective().clone() } /// Get `append_entries` request for `follower_id` that contains the latest From c2839aff2e6817e5aa955f06df0597d3dab14e51 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 9 Aug 2024 18:20:12 +0800 Subject: [PATCH 146/322] chore: add member add/remove to auth wrapper --- crates/xline/src/server/auth_wrapper.rs | 27 +++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/crates/xline/src/server/auth_wrapper.rs b/crates/xline/src/server/auth_wrapper.rs index 2cdb75287..03647c8f7 100644 --- a/crates/xline/src/server/auth_wrapper.rs +++ b/crates/xline/src/server/auth_wrapper.rs @@ -3,12 +3,13 @@ use std::sync::Arc; use curp::{ cmd::PbCodec, rpc::{ - AddLearnerRequest, AddLearnerResponse, FetchClusterRequest, FetchClusterResponse, - FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, - FetchReadStateResponse, LeaseKeepAliveMsg, MoveLeaderRequest, MoveLeaderResponse, - OpResponse, ProposeConfChangeRequest, ProposeConfChangeResponse, ProposeRequest, Protocol, - PublishRequest, PublishResponse, ReadIndexRequest, ReadIndexResponse, RecordRequest, - RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, ShutdownRequest, + AddLearnerRequest, AddLearnerResponse, AddMemberRequest, AddMemberResponse, + FetchClusterRequest, FetchClusterResponse, FetchMembershipRequest, FetchMembershipResponse, + FetchReadStateRequest, FetchReadStateResponse, LeaseKeepAliveMsg, MoveLeaderRequest, + MoveLeaderResponse, OpResponse, ProposeConfChangeRequest, ProposeConfChangeResponse, + ProposeRequest, Protocol, PublishRequest, PublishResponse, ReadIndexRequest, + ReadIndexResponse, RecordRequest, RecordResponse, RemoveLearnerRequest, + RemoveLearnerResponse, RemoveMemberRequest, RemoveMemberResponse, ShutdownRequest, ShutdownResponse, }, }; @@ -144,4 +145,18 @@ impl Protocol for AuthWrapper { ) -> Result, tonic::Status> { self.curp_server.remove_learner(request).await } + + async fn add_member( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + self.curp_server.add_member(request).await + } + + async fn remove_member( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + self.curp_server.remove_member(request).await + } } From 76e9a7d0c2a9bbb7dc41f1c226a3bd6e4c896372 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 12 Aug 2024 13:24:42 +0800 Subject: [PATCH 147/322] refactor: curp membership startup Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 801ddef82..41dc36a7a 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -23,7 +23,8 @@ pub(crate) struct NodeMembershipState { impl NodeMembershipState { /// Creates a new `NodeMembershipState` - // FIXME: specify the node id and initial membership state in node config + /// + /// This method is used to build learners pub(crate) fn new() -> Self { Self { node_id: 0, @@ -31,6 +32,25 @@ impl NodeMembershipState { } } + /// Creates a new `NodeMembershipState` with initial state + /// + /// This method is used to build the leader + pub(crate) fn new_init(node_id: u64, init_members: BTreeMap) -> Self { + let init_ms = Membership { + members: vec![init_members.keys().copied().collect()], + nodes: init_members, + }; + let cluster_state = MembershipState { + effective: init_ms, + index_effective: 1, + committed: Membership::default(), + }; + Self { + node_id: 0, + cluster_state, + } + } + /// Returns the id of the current node pub(crate) fn node_id(&self) -> u64 { self.node_id From 3d64afad1947b8f93beb8b0ae68eeab20090b2e7 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 12 Aug 2024 18:07:35 +0800 Subject: [PATCH 148/322] feat: add membership change config Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/lib.rs | 2 +- crates/curp/src/member.rs | 42 ++++++++++++++------- crates/curp/src/server/curp_node/mod.rs | 4 +- crates/curp/src/server/mod.rs | 5 +++ crates/curp/src/server/raw_curp/mod.rs | 5 ++- crates/curp/tests/it/common/curp_group.rs | 31 ++++++++++++++- crates/utils/src/config.rs | 46 +++++++++++++++++++++-- crates/utils/src/parser.rs | 28 +++++++++++++- crates/xline-test-utils/src/lib.rs | 27 ++++++++++++- crates/xline/src/server/xline_server.rs | 10 +++++ crates/xline/src/utils/args.rs | 17 ++++++++- 11 files changed, 191 insertions(+), 26 deletions(-) diff --git a/crates/curp/src/lib.rs b/crates/curp/src/lib.rs index 4be9e9b0c..58b57b4b0 100644 --- a/crates/curp/src/lib.rs +++ b/crates/curp/src/lib.rs @@ -207,7 +207,7 @@ mod snapshot; mod response; /// Membership state -mod member; +pub mod member; /// Quorum definitions mod quorum; diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 41dc36a7a..603c7b9a0 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -11,6 +11,28 @@ use serde::Serialize; use crate::quorum::Joint; +/// The membership info, used to build the initial states +#[derive(Debug, Clone)] +#[non_exhaustive] +pub struct MembershipInfo { + /// The id of current node + pub node_id: u64, + /// The initial cluster members + pub init_members: BTreeMap, +} + +impl MembershipInfo { + /// Creates a new `MembershipInfo` + #[inline] + #[must_use] + pub fn new(node_id: u64, init_members: BTreeMap) -> Self { + Self { + node_id, + init_members, + } + } +} + /// The membership state of the node pub(crate) struct NodeMembershipState { /// The id of current node @@ -22,20 +44,12 @@ pub(crate) struct NodeMembershipState { } impl NodeMembershipState { - /// Creates a new `NodeMembershipState` - /// - /// This method is used to build learners - pub(crate) fn new() -> Self { - Self { - node_id: 0, - cluster_state: MembershipState::default(), - } - } - /// Creates a new `NodeMembershipState` with initial state - /// - /// This method is used to build the leader - pub(crate) fn new_init(node_id: u64, init_members: BTreeMap) -> Self { + pub(crate) fn new(info: MembershipInfo) -> Self { + let MembershipInfo { + node_id, + init_members, + } = info; let init_ms = Membership { members: vec![init_members.keys().copied().collect()], nodes: init_members, @@ -46,7 +60,7 @@ impl NodeMembershipState { committed: Membership::default(), }; Self { - node_id: 0, + node_id, cluster_state, } } diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 81cb284d8..b7f9b4a18 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -40,7 +40,7 @@ use super::{ use crate::{ cmd::{Command, CommandExecutor}, log_entry::{EntryData, LogEntry}, - member::Membership, + member::{Membership, MembershipInfo}, members::{ClusterInfo, ServerId}, response::ResponseSender, role_change::RoleChange, @@ -873,6 +873,7 @@ impl, RC: RoleChange> CurpNode { #[allow(clippy::too_many_arguments)] // TODO: refactor this use builder pattern #[allow(clippy::needless_pass_by_value)] // The value should be consumed pub(super) fn new( + membership_info: MembershipInfo, cluster_info: Arc, is_leader: bool, cmd_executor: Arc, @@ -924,6 +925,7 @@ impl, RC: RoleChange> CurpNode { .as_tx(as_tx.clone()) .resp_txs(Arc::new(Mutex::default())) .id_barrier(Arc::new(IdBarrier::new())) + .membership_info(membership_info) .build_raw_curp() .map_err(|e| CurpError::internal(format!("build raw curp failed, {e}")))?, ); diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index 0ecfa0ec7..89998aba0 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -20,6 +20,7 @@ use self::curp_node::CurpNode; pub use self::raw_curp::RawCurp; use crate::cmd::Command; use crate::cmd::CommandExecutor; +use crate::member::MembershipInfo; use crate::members::ClusterInfo; use crate::members::ServerId; use crate::response::ResponseSender; @@ -378,6 +379,7 @@ impl, RC: RoleChange> Rpc { #[inline] #[allow(clippy::too_many_arguments)] // TODO: refactor this use builder pattern pub fn new( + membership_info: MembershipInfo, cluster_info: Arc, is_leader: bool, executor: Arc, @@ -392,6 +394,7 @@ impl, RC: RoleChange> Rpc { ) -> Self { #[allow(clippy::panic)] let curp_node = match CurpNode::new( + membership_info, cluster_info, is_leader, executor, @@ -426,6 +429,7 @@ impl, RC: RoleChange> Rpc { #[allow(clippy::too_many_arguments)] #[inline] pub async fn run_from_addr( + membership_info: MembershipInfo, cluster_info: Arc, is_leader: bool, addr: std::net::SocketAddr, @@ -449,6 +453,7 @@ impl, RC: RoleChange> Rpc { .get_shutdown_listener(TaskName::TonicServer) .unwrap_or_else(|| unreachable!("cluster should never shutdown before start")); let server = Self::new( + membership_info, cluster_info, is_leader, executor, diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index d5f0371f6..81cdc0d9f 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -65,6 +65,7 @@ use crate::cmd::Command; use crate::log_entry::EntryData; use crate::log_entry::LogEntry; use crate::member::Membership; +use crate::member::MembershipInfo; use crate::member::NodeMembershipState; use crate::members::ClusterInfo; use crate::members::ServerId; @@ -133,6 +134,8 @@ pub struct RawCurp { #[derive(Builder)] #[builder(name = "RawCurpBuilder")] pub(super) struct RawCurpArgs { + /// Membership information + membership_info: MembershipInfo, /// Cluster information cluster_info: Arc, /// Current node is leader or not @@ -220,7 +223,7 @@ impl RawCurpBuilder { log, ctx, task_manager: args.task_manager, - ms: RwLock::new(NodeMembershipState::new()), + ms: RwLock::new(NodeMembershipState::new(args.membership_info)), }; if args.is_leader { diff --git a/crates/curp/tests/it/common/curp_group.rs b/crates/curp/tests/it/common/curp_group.rs index 3afe7bd8d..a3b66b2d2 100644 --- a/crates/curp/tests/it/common/curp_group.rs +++ b/crates/curp/tests/it/common/curp_group.rs @@ -1,5 +1,11 @@ use std::{ - collections::HashMap, error::Error, fmt::Display, iter, path::PathBuf, sync::Arc, thread, + collections::{BTreeMap, HashMap}, + error::Error, + fmt::Display, + iter, + path::PathBuf, + sync::Arc, + thread, time::Duration, }; @@ -8,6 +14,7 @@ use clippy_utilities::NumericCast; use curp::{ client::{ClientApi, ClientBuilder}, error::ServerError, + member::MembershipInfo, members::{ClusterInfo, ServerId}, rpc::{InnerProtocolServer, Member, ProtocolServer}, server::{ @@ -112,7 +119,7 @@ impl CurpGroup { let mut nodes = HashMap::new(); let client_tls_config = None; let server_tls_config = None; - for (name, (config, xline_storage_config)) in configs.into_iter() { + for (node_id, (name, (config, xline_storage_config))) in configs.into_iter().enumerate() { let task_manager = Arc::new(TaskManager::new()); let snapshot_allocator = Self::get_snapshot_allocator_from_cfg(&config); let cluster_info = Arc::new(ClusterInfo::from_members_map( @@ -120,6 +127,13 @@ impl CurpGroup { [], &name, )); + let init_members = all_members_addrs + .values() + .map(|addrs| addrs[0].clone()) + .enumerate() + .map(|(id, addr)| (id as u64, addr)) + .collect(); + let membership_info = MembershipInfo::new(node_id as u64, init_members); let listener = listeners.remove(&name).unwrap(); let id = cluster_info.self_id(); let addr = cluster_info.self_peer_urls().pop().unwrap(); @@ -137,6 +151,7 @@ impl CurpGroup { let role_change_arc = role_change_cb.get_inner_arc(); let curp_storage = Arc::new(DB::open(&config.engine_cfg).unwrap()); let server = Arc::new(Rpc::new( + membership_info, cluster_info, name == leader_name, ce, @@ -265,7 +280,19 @@ impl CurpGroup { let role_change_cb = TestRoleChange::default(); let role_change_arc = role_change_cb.get_inner_arc(); let curp_storage = Arc::new(DB::open(&config.engine_cfg).unwrap()); + + // TODO: remove cluster info and build the membership info from start + let init_members: BTreeMap<_, _> = cluster_info + .all_members_peer_urls() + .values() + .map(|addrs| addrs[0].clone()) + .enumerate() + .map(|(id, addr)| (id as u64, addr)) + .collect(); + let node_id = init_members.len(); + let membership_info = MembershipInfo::new(node_id as u64, init_members); let server = Arc::new(Rpc::new( + membership_info, cluster_info, false, ce, diff --git a/crates/utils/src/config.rs b/crates/utils/src/config.rs index 0f59dc853..def0994cd 100644 --- a/crates/utils/src/config.rs +++ b/crates/utils/src/config.rs @@ -1,4 +1,8 @@ -use std::{collections::HashMap, path::PathBuf, time::Duration}; +use std::{ + collections::{BTreeMap, HashMap}, + path::PathBuf, + time::Duration, +}; use derive_builder::Builder; use getset::Getters; @@ -119,6 +123,12 @@ pub struct ClusterConfig { #[getset(get = "pub")] #[serde(with = "state_format", default = "InitialClusterState::default")] initial_cluster_state: InitialClusterState, + /// Initial cluster members + #[getset(get = "pub")] + initial_membership_info: BTreeMap, + /// Node id + #[getset(get = "pub")] + node_id: u64, } impl Default for ClusterConfig { @@ -139,6 +149,8 @@ impl Default for ClusterConfig { client_config: ClientConfig::default(), server_timeout: ServerTimeout::default(), initial_cluster_state: InitialClusterState::default(), + initial_membership_info: BTreeMap::from([(0, "http://127.0.0.1:2379".to_owned())]), + node_id: 0, } } } @@ -189,6 +201,8 @@ impl ClusterConfig { client_config: ClientConfig, server_timeout: ServerTimeout, initial_cluster_state: InitialClusterState, + initial_membership_info: BTreeMap, + node_id: u64, ) -> Self { Self { name, @@ -202,6 +216,8 @@ impl ClusterConfig { client_config, server_timeout, initial_cluster_state, + initial_membership_info, + node_id, } } } @@ -1212,6 +1228,7 @@ mod tests { peer_advertise_urls = ['127.0.0.1:2380'] client_listen_urls = ['127.0.0.1:2379'] client_advertise_urls = ['127.0.0.1:2379'] + node_id = 1 [cluster.server_timeout] range_retry_timeout = '3s' @@ -1224,6 +1241,11 @@ mod tests { node2 = ['127.0.0.1:2380'] node3 = ['127.0.0.1:2381'] + [cluster.initial_membership_info] + 1 = '127.0.0.1:2379' + 2 = '127.0.0.1:2380' + 3 = '127.0.0.1:2381' + [cluster.curp_config] heartbeat_interval = '200ms' wait_synced_timeout = '100ms' @@ -1320,7 +1342,13 @@ mod tests { curp_config, client_config, server_timeout, - InitialClusterState::New + InitialClusterState::New, + BTreeMap::from([ + (1, "127.0.0.1:2379".to_owned()), + (2, "127.0.0.1:2380".to_owned()), + (3, "127.0.0.1:2381".to_owned()), + ]), + 1, ) ); @@ -1399,12 +1427,18 @@ mod tests { peer_advertise_urls = ['127.0.0.1:2380'] client_listen_urls = ['127.0.0.1:2379'] client_advertise_urls = ['127.0.0.1:2379'] + node_id = 1 [cluster.peers] node1 = ['127.0.0.1:2379'] node2 = ['127.0.0.1:2380'] node3 = ['127.0.0.1:2381'] + [cluster.initial_membership_info] + 1 = '127.0.0.1:2379' + 2 = '127.0.0.1:2380' + 3 = '127.0.0.1:2381' + [cluster.storage] [log] @@ -1445,7 +1479,13 @@ mod tests { CurpConfigBuilder::default().build().unwrap(), ClientConfig::default(), ServerTimeout::default(), - InitialClusterState::default() + InitialClusterState::default(), + BTreeMap::from([ + (1, "127.0.0.1:2379".to_owned()), + (2, "127.0.0.1:2380".to_owned()), + (3, "127.0.0.1:2381".to_owned()), + ]), + 1, ) ); diff --git a/crates/utils/src/parser.rs b/crates/utils/src/parser.rs index 75289a5f7..b05fbf268 100644 --- a/crates/utils/src/parser.rs +++ b/crates/utils/src/parser.rs @@ -1,4 +1,8 @@ -use std::{collections::HashMap, path::PathBuf, time::Duration}; +use std::{ + collections::{BTreeMap, HashMap}, + path::PathBuf, + time::Duration, +}; use clippy_utilities::OverflowArithmetic; use regex::Regex; @@ -70,6 +74,28 @@ pub fn parse_members(s: &str) -> Result>, ConfigPars Ok(map) } +/// Parse members from string like "0=addr1,1=addr2,2=addr3" +/// +/// # Errors +/// +/// Return error when pass wrong args +#[inline] +pub fn parse_membership(s: &str) -> Result, ConfigParseError> { + // TODO: currently reuse `parse_members`. Rewrite this after the old membership change is + // removed. + let ms = parse_members(s)?; + ms.into_iter() + .map(|(k, v)| { + k.parse() + .ok() + .zip(v.into_iter().next()) + .ok_or(ConfigParseError::InvalidValue( + "parese membership error".to_owned(), + )) + }) + .collect::>() +} + /// Parse `ClusterRange` from the given string /// /// # Errors diff --git a/crates/xline-test-utils/src/lib.rs b/crates/xline-test-utils/src/lib.rs index b3135bf24..6c66a826e 100644 --- a/crates/xline-test-utils/src/lib.rs +++ b/crates/xline-test-utils/src/lib.rs @@ -1,4 +1,10 @@ -use std::{collections::HashMap, env::temp_dir, iter, path::PathBuf, sync::Arc}; +use std::{ + collections::{BTreeMap, HashMap}, + env::temp_dir, + iter, + path::PathBuf, + sync::Arc, +}; use futures::future::join_all; use rand::{distributions::Alphanumeric, thread_rng, Rng}; @@ -103,6 +109,13 @@ impl Cluster { .collect(), i == 0, InitialClusterState::New, + self.all_members_peer_urls + .clone() + .into_iter() + .enumerate() + .map(|(i, addr)| (i as u64, addr)) + .collect(), + i as u64, ); let server = Arc::new( @@ -171,6 +184,13 @@ impl Cluster { peers, false, InitialClusterState::Existing, + self.all_members_peer_urls + .clone() + .into_iter() + .enumerate() + .map(|(i, addr)| (i as u64, addr)) + .collect(), + idx as u64, ); let server = XlineServer::new( @@ -265,6 +285,7 @@ impl Cluster { Self::default_config_with_quota_and_rocks_path(path, quota) } + #[allow(clippy::too_many_arguments)] fn merge_config( base_config: &XlineServerConfig, name: String, @@ -273,6 +294,8 @@ impl Cluster { peers: HashMap>, is_leader: bool, initial_cluster_state: InitialClusterState, + initial_membership_info: BTreeMap, + node_id: u64, ) -> XlineServerConfig { let old_cluster = base_config.cluster(); let new_cluster = ClusterConfig::new( @@ -287,6 +310,8 @@ impl Cluster { *old_cluster.client_config(), *old_cluster.server_timeout(), initial_cluster_state, + initial_membership_info, + node_id, ); XlineServerConfig::new( new_cluster, diff --git a/crates/xline/src/server/xline_server.rs b/crates/xline/src/server/xline_server.rs index 0ece5cdc6..b36fbd4ac 100644 --- a/crates/xline/src/server/xline_server.rs +++ b/crates/xline/src/server/xline_server.rs @@ -4,6 +4,7 @@ use anyhow::{anyhow, Result}; use clippy_utilities::{NumericCast, OverflowArithmetic}; use curp::{ client::ClientBuilder as CurpClientBuilder, + member::MembershipInfo, members::{get_cluster_info_from_remote, ClusterInfo}, rpc::{InnerProtocolServer, ProtocolServer}, server::{Rpc, StorageApi as _, DB as CurpDB}, @@ -73,6 +74,8 @@ pub(crate) type CurpServer = Rpc /// Xline server #[derive(Debug)] pub struct XlineServer { + /// Membership information + membership_info: MembershipInfo, /// Cluster information cluster_info: Arc, /// Cluster Config @@ -121,6 +124,11 @@ impl XlineServer { ) .await?, ); + let membership_info = MembershipInfo::new( + *cluster_config.node_id(), + cluster_config.initial_membership_info().clone(), + ); + Ok(Self { cluster_info, cluster_config, @@ -131,6 +139,7 @@ impl XlineServer { server_tls_config, task_manager: Arc::new(TaskManager::new()), curp_storage, + membership_info, }) } @@ -507,6 +516,7 @@ impl XlineServer { let curp_config = Arc::new(self.cluster_config.curp_config().clone()); let curp_server = CurpServer::new( + self.membership_info.clone(), Arc::clone(&self.cluster_info), *self.cluster_config.is_leader(), Arc::clone(&ce), diff --git a/crates/xline/src/utils/args.rs b/crates/xline/src/utils/args.rs index f8b6d44c8..928c405dc 100644 --- a/crates/xline/src/utils/args.rs +++ b/crates/xline/src/utils/args.rs @@ -1,4 +1,9 @@ -use std::{collections::HashMap, env, path::PathBuf, time::Duration}; +use std::{ + collections::{BTreeMap, HashMap}, + env, + path::PathBuf, + time::Duration, +}; use anyhow::Result; use clap::Parser; @@ -21,7 +26,7 @@ use utils::{ XlineServerConfig, }, parse_batch_bytes, parse_duration, parse_log_file, parse_log_level, parse_members, - parse_metrics_push_protocol, parse_rotation, parse_state, ConfigFileError, + parse_membership, parse_metrics_push_protocol, parse_rotation, parse_state, ConfigFileError, }; /// Xline server config path env name @@ -213,6 +218,12 @@ pub struct ServerArgs { /// Client private key path #[clap(long)] client_key_path: Option, + /// Cluster membership. eg: 0=192.168.x.x:8080,1=192.168.x.x:8081 + #[clap(long, value_parser = parse_membership)] + membership_info: BTreeMap, + /// The id of current node + #[clap(long)] + node_id: u64, } #[allow(clippy::too_many_lines)] // will be refactored in #604 @@ -291,6 +302,8 @@ impl From for XlineServerConfig { client_config, server_timeout, initial_cluster_state, + args.membership_info, + args.node_id, ); let log = LogConfig::new(args.log_file, args.log_rotate, args.log_level); let trace = TraceConfig::new( From 36e80b06f713ee53c39f60a60dbdded98f899b3c Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 13 Aug 2024 08:38:25 +0800 Subject: [PATCH 149/322] feat: implement server member rpc connect Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 11 +++++++++- .../curp/src/server/curp_node/member_impl.rs | 22 ++++++++++++++++++- crates/curp/src/server/curp_node/mod.rs | 11 ++++++++++ crates/curp/src/server/raw_curp/mod.rs | 8 ++++++- 4 files changed, 49 insertions(+), 3 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 603c7b9a0..28ff6d5cb 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -10,6 +10,7 @@ use serde::Deserialize; use serde::Serialize; use crate::quorum::Joint; +use crate::rpc::connect::InnerConnectApiWrapper; /// The membership info, used to build the initial states #[derive(Debug, Clone)] @@ -41,11 +42,17 @@ pub(crate) struct NodeMembershipState { node_id: u64, /// The membership state of the cluster cluster_state: MembershipState, + #[allow(unused)] + /// The rpc connects of nodes + connects: BTreeMap, } impl NodeMembershipState { /// Creates a new `NodeMembershipState` with initial state - pub(crate) fn new(info: MembershipInfo) -> Self { + pub(crate) fn new( + info: MembershipInfo, + init_connects: BTreeMap, + ) -> Self { let MembershipInfo { node_id, init_members, @@ -62,6 +69,7 @@ impl NodeMembershipState { Self { node_id, cluster_state, + connects: init_connects, } } @@ -281,6 +289,7 @@ impl Membership { #[allow(unused)] /// The change of membership +#[derive(Clone)] pub(crate) enum Change { /// Adds learners AddLearner(Vec<(u64, String)>), diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 8fa4a1bac..168a3af03 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -9,6 +9,8 @@ use curp_external_api::cmd::CommandExecutor; use curp_external_api::role_change::RoleChange; use crate::member::Change; +use crate::rpc::connect::InnerConnectApiWrapper; +use crate::rpc::inner_connects; use crate::rpc::AddLearnerRequest; use crate::rpc::AddLearnerResponse; use crate::rpc::AddMemberRequest; @@ -73,10 +75,11 @@ impl, RC: RoleChange> CurpNode { /// Updates the membership based on the given change and waits for /// the proposal to be committed async fn update_and_wait(&self, change: Change) -> Result<(), CurpError> { - let configs = self.curp.generate_membership(change); + let configs = self.curp.generate_membership(change.clone()); if configs.is_empty() { return Err(CurpError::invalid_member_change()); } + //let new_connects = self.connect_node(&change).await; for config in configs { let propose_id = self.curp.update_membership(config); self.curp.wait_propose_ids(Some(propose_id)).await; @@ -84,4 +87,21 @@ impl, RC: RoleChange> CurpNode { Ok(()) } + + #[allow(unused)] + /// Connect to the nodes if new learners are added + fn connect_node( + &self, + change: &Change, + ) -> Option> { + let Change::AddLearner(ref nodes) = *change else { + return None; + }; + let nodes = nodes + .iter() + .map(|&(id, ref addr)| (id, vec![addr.clone()])) + .collect(); + + Some(inner_connects(nodes, self.curp.client_tls_config())) + } } diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index b7f9b4a18..b82955bf8 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -893,6 +893,16 @@ impl, RC: RoleChange> CurpNode { .collect(); let connects = rpc::inner_connects(cluster_info.peers_addrs(), client_tls_config.as_ref()).collect(); + let member_connects = rpc::inner_connects( + membership_info + .init_members + .clone() + .into_iter() + .map(|(id, addr)| (id, vec![addr])) + .collect(), + client_tls_config.as_ref(), + ) + .collect(); let cmd_board = Arc::new(RwLock::new(CommandBoard::new())); let lease_manager = Arc::new(RwLock::new(LeaseManager::new())); let last_applied = cmd_executor @@ -926,6 +936,7 @@ impl, RC: RoleChange> CurpNode { .resp_txs(Arc::new(Mutex::default())) .id_barrier(Arc::new(IdBarrier::new())) .membership_info(membership_info) + .member_connects(member_connects) .build_raw_curp() .map_err(|e| CurpError::internal(format!("build raw curp failed, {e}")))?, ); diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 81cdc0d9f..739d83315 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -12,6 +12,7 @@ use std::cmp; use std::cmp::min; +use std::collections::BTreeMap; use std::collections::HashMap; use std::collections::HashSet; use std::fmt::Debug; @@ -136,6 +137,8 @@ pub struct RawCurp { pub(super) struct RawCurpArgs { /// Membership information membership_info: MembershipInfo, + /// Member connects + member_connects: BTreeMap, /// Cluster information cluster_info: Arc, /// Current node is leader or not @@ -223,7 +226,10 @@ impl RawCurpBuilder { log, ctx, task_manager: args.task_manager, - ms: RwLock::new(NodeMembershipState::new(args.membership_info)), + ms: RwLock::new(NodeMembershipState::new( + args.membership_info, + args.member_connects, + )), }; if args.is_leader { From 28cd734ede9025a85c5c9a6f1b2d163488f88038 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 13 Aug 2024 17:00:40 +0800 Subject: [PATCH 150/322] feat: implement server rpc connect update Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 24 ++++++++------ .../curp/src/server/curp_node/member_impl.rs | 20 ----------- .../curp/src/server/raw_curp/member_impl.rs | 33 +++++++++++++++---- 3 files changed, 41 insertions(+), 36 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 28ff6d5cb..19a136c56 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -92,6 +92,20 @@ impl NodeMembershipState { pub(crate) fn is_member(&self) -> bool { self.cluster().effective().contains(self.node_id()) } + + /// Updates the connects + pub(crate) fn update_connects(&mut self, new_connects: BTreeMap) { + self.connects.retain(|k, _| new_connects.contains_key(k)); + for (id, conn) in new_connects { + let _ignore = self.connects.entry(id).or_insert(conn); + } + } + + #[allow(unused)] + /// Get all rpc connects + pub(crate) fn connects(&self) -> &BTreeMap { + &self.connects + } } /// Membership state stored in current node @@ -107,16 +121,6 @@ pub(crate) struct MembershipState { #[allow(unused)] impl MembershipState { - /// Update the effective membership - pub(crate) fn update_effective(&mut self, config: Membership) { - self.effective = config; - } - - /// Update the committed membership - pub(crate) fn update_commit(&mut self, config: Membership) { - self.committed = config; - } - /// Append a membership change entry pub(crate) fn append(&mut self, index: LogIndex, membership: Membership) { self.index_effective = index; diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 168a3af03..e216646ef 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -9,8 +9,6 @@ use curp_external_api::cmd::CommandExecutor; use curp_external_api::role_change::RoleChange; use crate::member::Change; -use crate::rpc::connect::InnerConnectApiWrapper; -use crate::rpc::inner_connects; use crate::rpc::AddLearnerRequest; use crate::rpc::AddLearnerResponse; use crate::rpc::AddMemberRequest; @@ -79,7 +77,6 @@ impl, RC: RoleChange> CurpNode { if configs.is_empty() { return Err(CurpError::invalid_member_change()); } - //let new_connects = self.connect_node(&change).await; for config in configs { let propose_id = self.curp.update_membership(config); self.curp.wait_propose_ids(Some(propose_id)).await; @@ -87,21 +84,4 @@ impl, RC: RoleChange> CurpNode { Ok(()) } - - #[allow(unused)] - /// Connect to the nodes if new learners are added - fn connect_node( - &self, - change: &Change, - ) -> Option> { - let Change::AddLearner(ref nodes) = *change else { - return None; - }; - let nodes = nodes - .iter() - .map(|&(id, ref addr)| (id, vec![addr.clone()])) - .collect(); - - Some(inner_connects(nodes, self.curp.client_tls_config())) - } } diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index ff45a7305..f7bb33b29 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -1,3 +1,5 @@ +use std::collections::BTreeMap; + use curp_external_api::cmd::Command; use curp_external_api::role_change::RoleChange; use curp_external_api::LogIndex; @@ -8,6 +10,8 @@ use crate::log_entry::LogEntry; use crate::member::Change; use crate::member::Membership; use crate::member::NodeMembershipState; +use crate::rpc::connect::InnerConnectApiWrapper; +use crate::rpc::inner_connects; use crate::rpc::ProposeId; use super::RawCurp; @@ -33,10 +37,13 @@ impl RawCurp { // FIXME: define the lock order of log and ms let mut log_w = self.log.write(); let mut ms_w = self.ms.write(); - ms_w.cluster_mut().update_effective(config.clone()); let st_r = self.st.read(); let propose_id = ProposeId(rand::random(), 0); - let _entry = log_w.push(st_r.term, propose_id, config); + let entry = log_w.push(st_r.term, propose_id, config.clone()); + let new_connects = self.build_connects(&config); + ms_w.cluster_mut().append(entry.index, config); + ms_w.update_connects(new_connects); + propose_id } @@ -51,8 +58,7 @@ impl RawCurp { I: IntoIterator, { let mut ms_w = self.ms.write(); - let ms = ms_w.cluster_mut(); - ms.truncate(truncate_at); + ms_w.cluster_mut().truncate(truncate_at); let configs = entries.into_iter().filter_map(|entry| { let entry = entry.as_ref(); if let EntryData::Member(ref m) = entry.entry_data { @@ -62,8 +68,10 @@ impl RawCurp { } }); for (index, config) in configs { - ms.append(index, config); - ms.commit(commit_index.min(index)); + let new_connects = self.build_connects(&config); + ms_w.update_connects(new_connects); + ms_w.cluster_mut().append(index, config); + ms_w.cluster_mut().commit(commit_index.min(index)); } self.update_role(&ms_w); @@ -86,4 +94,17 @@ impl RawCurp { st_w.role = Role::Learner; } } + + /// Creates connections for new membership configuration. + /// + /// Returns a closure can be used to update the existing connections + fn build_connects(&self, config: &Membership) -> BTreeMap { + let nodes = config + .nodes + .iter() + .map(|(id, addr)| (*id, vec![addr.clone()])) + .collect(); + + inner_connects(nodes, self.client_tls_config()).collect() + } } From 803ee7365b9eb128032c8f5aee5706af62a30eda Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 14 Aug 2024 09:01:27 +0800 Subject: [PATCH 151/322] feat(raw_curp): implement sync node task for new membership Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 28 +++++++++-- .../curp/src/server/curp_node/member_impl.rs | 16 ++++++- crates/curp/src/server/curp_node/mod.rs | 46 +++++++++++++++++-- .../curp/src/server/raw_curp/member_impl.rs | 45 ++++++++++++++++-- crates/curp/src/server/raw_curp/mod.rs | 28 +++++++++-- crates/curp/src/server/raw_curp/tests.rs | 8 ++-- 6 files changed, 152 insertions(+), 19 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 19a136c56..e072d4905 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -94,11 +94,31 @@ impl NodeMembershipState { } /// Updates the connects - pub(crate) fn update_connects(&mut self, new_connects: BTreeMap) { - self.connects.retain(|k, _| new_connects.contains_key(k)); - for (id, conn) in new_connects { - let _ignore = self.connects.entry(id).or_insert(conn); + /// + /// Returns a pair of (removed, added) connects + pub(crate) fn update_connects( + &mut self, + new_connects: &BTreeMap, + ) -> ( + BTreeMap, + BTreeMap, + ) { + /// Alias + type Map = BTreeMap; + let diff = |x: &Map, y: &Map| { + x.iter() + .filter_map(|(k, c)| (!y.contains_key(k)).then_some((*k, c.clone()))) + .collect::>() + }; + let removed = diff(&self.connects, new_connects); + let added = diff(new_connects, &self.connects); + + for k in removed.keys() { + let _ignore = self.connects.remove(k); } + self.connects.extend(added.clone()); + + (removed, added) } #[allow(unused)] diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index e216646ef..dbca7eb91 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -4,9 +4,12 @@ clippy::needless_pass_by_value )] // TODO: remove this after implemented +use std::sync::Arc; + use curp_external_api::cmd::Command; use curp_external_api::cmd::CommandExecutor; use curp_external_api::role_change::RoleChange; +use utils::task_manager::tasks::TaskName; use crate::member::Change; use crate::rpc::AddLearnerRequest; @@ -77,8 +80,19 @@ impl, RC: RoleChange> CurpNode { if configs.is_empty() { return Err(CurpError::invalid_member_change()); } + let spawn_sync = |sync_event, remove_event, connect| { + self.curp.task_manager().spawn(TaskName::SyncFollower, |n| { + Self::sync_follower_task( + Arc::clone(&self.curp), + connect, + sync_event, + Arc::clone(&remove_event), + n, + ) + }); + }; for config in configs { - let propose_id = self.curp.update_membership(config); + let propose_id = self.curp.update_membership(config, spawn_sync); self.curp.wait_propose_ids(Some(propose_id)).await; } diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index b82955bf8..5c9b8f39a 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -447,7 +447,17 @@ impl, RC: RoleChange> CurpNode { req: &AppendEntriesRequest, ) -> Result { let entries = req.entries()?; - + let sync_spawner = |sync_event, remove_event, connect| { + self.curp.task_manager().spawn(TaskName::SyncFollower, |n| { + Self::sync_follower_task( + Arc::clone(&self.curp), + connect, + sync_event, + Arc::clone(&remove_event), + n, + ) + }); + }; let result = self.curp.handle_append_entries( req.term, req.leader_id, @@ -455,6 +465,7 @@ impl, RC: RoleChange> CurpNode { req.prev_log_term, entries, req.leader_commit, + sync_spawner, ); let resp = match result { Ok((term, to_persist)) => { @@ -773,7 +784,7 @@ impl, RC: RoleChange> CurpNode { /// This task will keep a follower up-to-data when current node is leader, /// and it will wait for `leader_event` if current node is not leader #[allow(clippy::arithmetic_side_effects, clippy::ignored_unit_patterns)] // tokio select internal triggered - async fn sync_follower_task( + pub(crate) async fn sync_follower_task( curp: Arc>, connect: InnerConnectApiWrapper, sync_event: Arc, @@ -891,6 +902,14 @@ impl, RC: RoleChange> CurpNode { .into_iter() .map(|server_id| (server_id, Arc::new(Event::new()))) .collect(); + let remove_events = Arc::new(Mutex::new( + membership_info + .init_members + .keys() + .map(|id| (*id, Arc::new(Event::new()))) + .collect(), + )); + let connects = rpc::inner_connects(cluster_info.peers_addrs(), client_tls_config.as_ref()).collect(); let member_connects = rpc::inner_connects( @@ -922,6 +941,7 @@ impl, RC: RoleChange> CurpNode { .lease_manager(Arc::clone(&lease_manager)) .cfg(Arc::clone(&curp_cfg)) .sync_events(sync_events) + .remove_events(remove_events) .role_change(role_change) .task_manager(Arc::clone(&task_manager)) .connects(connects) @@ -985,6 +1005,24 @@ impl, RC: RoleChange> CurpNode { }); let mut remove_events = HashMap::new(); + curp.with_member_connects(|connects| { + for c in connects.values() { + let sync_event = curp.sync_event(c.id()); + let remove_event = Arc::new(Event::new()); + + task_manager.spawn(TaskName::SyncFollower, |n| { + Self::sync_follower_task( + Arc::clone(&curp), + c.clone(), + sync_event, + Arc::clone(&remove_event), + n, + ) + }); + _ = remove_events.insert(c.id(), remove_event); + } + }); + // TODO: Remove this after new membership implementation for c in curp.connects() { let sync_event = curp.sync_event(c.id()); let remove_event = Arc::new(Event::new()); @@ -1332,7 +1370,7 @@ mod tests { )) }; let s2_id = curp.cluster().get_id_by_name("S2").unwrap(); - curp.handle_append_entries(2, s2_id, 0, 0, vec![], 0) + curp.handle_append_entries(2, s2_id, 0, 0, vec![], 0, |_, _, _| {}) .unwrap(); let mut mock_connect1 = MockInnerConnectApi::default(); @@ -1389,7 +1427,7 @@ mod tests { vec!["address".to_owned()], )]); - curp.handle_append_entries(1, s2_id, 0, 0, vec![], 0) + curp.handle_append_entries(1, s2_id, 0, 0, vec![], 0, |_, _, _| {}) .unwrap(); let mut mock_connect1 = MockInnerConnectApi::default(); diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index f7bb33b29..27c925bf0 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -1,8 +1,10 @@ use std::collections::BTreeMap; +use std::sync::Arc; use curp_external_api::cmd::Command; use curp_external_api::role_change::RoleChange; use curp_external_api::LogIndex; +use event_listener::Event; use rand::Rng; use crate::log_entry::EntryData; @@ -33,7 +35,10 @@ impl RawCurp { } /// Updates the membership config - pub(crate) fn update_membership(&self, config: Membership) -> ProposeId { + pub(crate) fn update_membership(&self, config: Membership, spawn_sync: F) -> ProposeId + where + F: Fn(Arc, Arc, InnerConnectApiWrapper), + { // FIXME: define the lock order of log and ms let mut log_w = self.log.write(); let mut ms_w = self.ms.write(); @@ -42,20 +47,23 @@ impl RawCurp { let entry = log_w.push(st_r.term, propose_id, config.clone()); let new_connects = self.build_connects(&config); ms_w.cluster_mut().append(entry.index, config); - ms_w.update_connects(new_connects); + let (removed, added) = ms_w.update_connects(&new_connects); + self.update_node_sync(removed, added, spawn_sync); propose_id } /// Append membership entries - pub(crate) fn append_membership( + pub(crate) fn append_membership( &self, entries: I, truncate_at: LogIndex, commit_index: LogIndex, + spawn_sync: F, ) where E: AsRef>, I: IntoIterator, + F: Fn(Arc, Arc, InnerConnectApiWrapper), { let mut ms_w = self.ms.write(); ms_w.cluster_mut().truncate(truncate_at); @@ -69,7 +77,8 @@ impl RawCurp { }); for (index, config) in configs { let new_connects = self.build_connects(&config); - ms_w.update_connects(new_connects); + let (removed, added) = ms_w.update_connects(&new_connects); + self.update_node_sync(removed, added, &spawn_sync); ms_w.cluster_mut().append(index, config); ms_w.cluster_mut().commit(commit_index.min(index)); } @@ -107,4 +116,32 @@ impl RawCurp { inner_connects(nodes, self.client_tls_config()).collect() } + + /// Updates the background task of node sync + /// TODO: member persistent + fn update_node_sync( + &self, + removed: BTreeMap, + added: BTreeMap, + spawn_sync: F, + ) where + F: Fn(Arc, Arc, InnerConnectApiWrapper), + { + let mut remove_events_l = self.ctx.remove_events.lock(); + for (id, connect) in added { + let sync_event = Arc::new(Event::new()); + let remove_event = Arc::new(Event::new()); + _ = self.ctx.sync_events.insert(id, Arc::new(Event::new())); + let _ignore = remove_events_l.insert(id, Arc::new(Event::new())); + spawn_sync(sync_event, remove_event, connect); + } + for (id, _connect) in removed { + _ = self.ctx.sync_events.remove(&id); + assert!( + remove_events_l.remove(&id).map(|e| e.notify(1)).is_some(), + "id doesn't exist" + ); + // TODO: update persistent membership + } + } } diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 739d83315..9ffe8855e 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -155,6 +155,8 @@ pub(super) struct RawCurpArgs { task_manager: Arc, /// Sync events sync_events: DashMap>, + /// Followers remove event trigger + remove_events: Arc>>>, /// Connects of peers connects: DashMap, /// curp storage @@ -202,6 +204,7 @@ impl RawCurpBuilder { .lm(args.lease_manager) .cfg(args.cfg) .sync_events(args.sync_events) + .remove_events(args.remove_events) .role_change(args.role_change) .connects(args.connects) .curp_storage(args.curp_storage) @@ -358,6 +361,8 @@ struct Context { election_tick: AtomicU8, /// Followers sync event trigger sync_events: DashMap>, + /// Followers remove event trigger + remove_events: Arc>>>, /// Become leader event #[builder(setter(skip))] leader_event: Arc, @@ -423,6 +428,10 @@ impl ContextBuilder { Some(value) => value, None => return Err(ContextBuilderError::UninitializedField("sync_events")), }, + remove_events: match self.remove_events.take() { + Some(value) => value, + None => return Err(ContextBuilderError::UninitializedField("remove_events")), + }, leader_event: Arc::new(Event::new()), role_change: match self.role_change.take() { Some(value) => value, @@ -788,7 +797,8 @@ impl RawCurp { /// Return `Ok(term, entries)` if succeeds /// Return `Err(term, hint_index)` if fails #[allow(clippy::needless_pass_by_value)] // TODO: avoid cloning of `entries` - pub(super) fn handle_append_entries( + #[allow(clippy::too_many_arguments)] // FIXME: reduce the number of arguments + pub(super) fn handle_append_entries( &self, term: u64, leader_id: ServerId, @@ -796,7 +806,11 @@ impl RawCurp { prev_log_term: u64, entries: Vec>, leader_commit: LogIndex, - ) -> Result, AppendEntriesFailure> { + spawn_sync: F, + ) -> Result, AppendEntriesFailure> + where + F: Fn(Arc, Arc, InnerConnectApiWrapper), + { if entries.is_empty() { trace!( "{} received heartbeat from {}: term({}), commit({}), prev_log_index({}), prev_log_term({})", @@ -836,7 +850,7 @@ impl RawCurp { let (to_persist, cc_entries, fallback_indexes, truncate_at) = log_w .try_append_entries(entries.clone(), prev_log_index, prev_log_term) .map_err(|_ig| (term, log_w.commit_index + 1))?; - self.append_membership(&entries, truncate_at, leader_commit); + self.append_membership(&entries, truncate_at, leader_commit, spawn_sync); // fallback overwritten conf change entries for idx in fallback_indexes.iter().sorted().rev() { let info = log_w.fallback_contexts.remove(idx).unwrap_or_else(|| { @@ -1624,6 +1638,14 @@ impl RawCurp { &self.ctx.connects } + /// Get all connects + pub(super) fn with_member_connects(&self, mut op: F) -> R + where + F: FnMut(&BTreeMap) -> R, + { + op(self.ms.read().connects()) + } + /// Insert connect pub(super) fn insert_connect(&self, connect: InnerConnectApiWrapper) { let _ig = self.ctx.connects.insert(connect.id(), connect); diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index d2eda551a..0cdf7a53b 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -265,7 +265,7 @@ fn handle_ae_will_calibrate_term() { curp.update_to_term_and_become_follower(&mut *curp.st.write(), 1); let s2_id = curp.cluster().get_id_by_name("S2").unwrap(); - let result = curp.handle_append_entries(2, s2_id, 0, 0, vec![], 0); + let result = curp.handle_append_entries(2, s2_id, 0, 0, vec![], 0, |_, _, _| {}); assert!(result.is_ok()); let st_r = curp.st.read(); @@ -282,7 +282,7 @@ fn handle_ae_will_set_leader_id() { curp.update_to_term_and_become_follower(&mut *curp.st.write(), 1); let s2_id = curp.cluster().get_id_by_name("S2").unwrap(); - let result = curp.handle_append_entries(1, s2_id, 0, 0, vec![], 0); + let result = curp.handle_append_entries(1, s2_id, 0, 0, vec![], 0, |_, _, _| {}); assert!(result.is_ok()); let st_r = curp.st.read(); @@ -299,7 +299,7 @@ fn handle_ae_will_reject_wrong_term() { curp.update_to_term_and_become_follower(&mut *curp.st.write(), 1); let s2_id = curp.cluster().get_id_by_name("S2").unwrap(); - let result = curp.handle_append_entries(0, s2_id, 0, 0, vec![], 0); + let result = curp.handle_append_entries(0, s2_id, 0, 0, vec![], 0, |_, _, _| {}); assert!(result.is_err()); assert_eq!(result.unwrap_err().0, 1); } @@ -324,6 +324,7 @@ fn handle_ae_will_reject_wrong_log() { Arc::new(TestCommand::default()), )], 0, + |_, _, _| {}, ); assert_eq!(result, Err((1, 1))); } @@ -440,6 +441,7 @@ fn handle_vote_will_reject_outdated_candidate() { Arc::new(TestCommand::default()), )], 0, + |_, _, _| {}, ); assert!(result.is_ok()); curp.st.write().leader_id = None; From 9a9882253b89677e82db9925672a0d966f862915 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 14 Aug 2024 16:29:31 +0800 Subject: [PATCH 152/322] refactor: build sync_events using membership_info Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 5c9b8f39a..5451d9cae 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -897,10 +897,10 @@ impl, RC: RoleChange> CurpNode { sps: Vec>, ucps: Vec>, ) -> Result { - let sync_events = cluster_info - .peers_ids() - .into_iter() - .map(|server_id| (server_id, Arc::new(Event::new()))) + let sync_events = membership_info + .init_members + .keys() + .map(|id| (*id, Arc::new(Event::new()))) .collect(); let remove_events = Arc::new(Mutex::new( membership_info From bacd0a3ae39b970e62bf37ccbc057b683cfd5643 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 14 Aug 2024 17:19:44 +0800 Subject: [PATCH 153/322] feat: implement `voter_connects` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 20 ++++++++++++++++++++ crates/curp/src/server/curp_node/mod.rs | 4 ++-- crates/curp/src/server/raw_curp/mod.rs | 11 +++-------- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index e072d4905..be3c86e0e 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -4,12 +4,14 @@ use std::collections::BTreeSet; use std::collections::HashSet; use std::hash::Hash; use std::iter; +use std::sync::Arc; use curp_external_api::LogIndex; use serde::Deserialize; use serde::Serialize; use crate::quorum::Joint; +use crate::rpc::connect::InnerConnectApi; use crate::rpc::connect::InnerConnectApiWrapper; /// The membership info, used to build the initial states @@ -126,6 +128,24 @@ impl NodeMembershipState { pub(crate) fn connects(&self) -> &BTreeMap { &self.connects } + + /// Get all voter connects + pub(crate) fn voter_connects(&self) -> BTreeMap> { + self.cluster() + .effective() + .members() + .map(|(id, _)| { + ( + id, + Arc::clone( + self.connects + .get(&id) + .unwrap_or_else(|| unreachable!("connect should always exist")), + ), + ) + }) + .collect() + } } /// Membership state stored in current node diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 5451d9cae..f4b2aca69 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -1066,7 +1066,7 @@ impl, RC: RoleChange> CurpNode { let voters_connects = curp.voters_connects(); let resps = voters_connects .into_iter() - .map(|connect| { + .map(|(id, connect)| { let req = VoteRequest::new( vote.term, vote.candidate_id, @@ -1076,7 +1076,7 @@ impl, RC: RoleChange> CurpNode { ); async move { let resp = connect.vote(req, rpc_timeout).await; - (connect.id(), resp) + (id, resp) } }) .collect::>() diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 9ffe8855e..784d0828d 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -1664,14 +1664,9 @@ impl RawCurp { } /// Get voters connects - pub(super) fn voters_connects(&self) -> Vec> { - let cst_r = self.cst.lock(); - let voters = cst_r.config.voters(); - self.connects() - .iter() - .filter(|c| voters.contains(c.key())) - .map(|c| Arc::clone(c.value())) - .collect() + pub(super) fn voters_connects(&self) -> BTreeMap> { + let ms_r = self.ms.read(); + ms_r.voter_connects() } /// Get transferee From a6c88487b090a910f86b78f66c7c4e1961e823b7 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 14 Aug 2024 17:29:40 +0800 Subject: [PATCH 154/322] refactor: remove the `Config` type in vote Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/tests.rs | 2 - crates/curp/src/rpc/mod.rs | 5 - crates/curp/src/server/raw_curp/mod.rs | 45 ++------- crates/curp/src/server/raw_curp/state.rs | 121 +---------------------- crates/curp/src/server/raw_curp/tests.rs | 5 - 5 files changed, 13 insertions(+), 165 deletions(-) diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index 37dd6317c..20a51e57e 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -214,7 +214,6 @@ async fn test_unary_propose_fast_path_fallback_slow_path() { async fn test_unary_propose_return_early_err() { for early_err in [ CurpError::shutting_down(), - CurpError::invalid_config(), CurpError::node_already_exists(), CurpError::node_not_exist(), CurpError::learner_not_catch_up(), @@ -257,7 +256,6 @@ async fn test_unary_propose_return_early_err() { async fn test_retry_propose_return_no_retry_error() { for early_err in [ CurpError::shutting_down(), - CurpError::invalid_config(), CurpError::node_already_exists(), CurpError::node_not_exist(), CurpError::learner_not_catch_up(), diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index 6639ccc50..004a71a28 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -636,11 +636,6 @@ impl CurpError { Self::ExpiredClientId(()) } - /// `InvalidConfig` error - pub(crate) fn invalid_config() -> Self { - Self::InvalidConfig(()) - } - /// `NodeNotExists` error pub(crate) fn node_not_exist() -> Self { Self::NodeNotExists(()) diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 784d0828d..13108e7e5 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -195,7 +195,7 @@ impl RawCurpBuilder { args.cfg.candidate_timeout_ticks, )); let lst = LeaderState::new(&args.cluster_info.peers_ids()); - let cst = Mutex::new(CandidateState::new(args.cluster_info.all_ids().into_iter())); + let cst = Mutex::new(CandidateState::new()); let log = RwLock::new(Log::new(args.cfg.batch_max_size, args.cfg.log_entries_cap)); let ctx = Context::builder() @@ -1484,31 +1484,25 @@ impl RawCurp { .copied() .chain([self.id()]) .collect::>(); - let mut config = self.cst.map_lock(|cst_l| cst_l.config.clone()); let node_id = conf_change.node_id; match conf_change.change_type() { - ConfChangeType::Add => { - if !statuses_ids.insert(node_id) || !config.insert(node_id, false) { + ConfChangeType::Add | ConfChangeType::AddLearner => { + if !statuses_ids.insert(node_id) { return Err(CurpError::node_already_exists()); } } ConfChangeType::Remove => { - if !statuses_ids.remove(&node_id) || !config.remove(node_id) { + if !statuses_ids.remove(&node_id) { return Err(CurpError::node_not_exist()); } } ConfChangeType::Update => { - if statuses_ids.get(&node_id).is_none() || !config.contains(node_id) { + if statuses_ids.get(&node_id).is_none() { return Err(CurpError::node_not_exist()); } } - ConfChangeType::AddLearner => { - if !statuses_ids.insert(node_id) || !config.insert(node_id, true) { - return Err(CurpError::node_already_exists()); - } - } ConfChangeType::Promote => { - if statuses_ids.get(&node_id).is_none() || !config.contains(node_id) { + if statuses_ids.get(&node_id).is_none() { metrics::get() .learner_promote_failed .add(1, &[KeyValue::new("reason", "learner not exist")]); @@ -1527,12 +1521,7 @@ impl RawCurp { } } } - let mut all_nodes = HashSet::new(); - all_nodes.extend(config.voters()); - all_nodes.extend(&config.learners); - if all_nodes != statuses_ids || !config.voters().is_disjoint(&config.learners) { - return Err(CurpError::invalid_config()); - } + Ok(()) } @@ -1574,8 +1563,6 @@ impl RawCurp { #[allow(clippy::explicit_auto_deref)] // Avoid compiler complaint about `Dashmap::Ref` type let fallback_change = match conf_change.change_type() { ConfChangeType::Add | ConfChangeType::AddLearner => { - self.cst - .map_lock(|mut cst_l| _ = cst_l.config.remove(node_id)); self.lst.remove(node_id); _ = self.ctx.sync_events.remove(&node_id); let _ig1 = self.ctx.cluster_info.remove(&node_id); @@ -1585,8 +1572,6 @@ impl RawCurp { } ConfChangeType::Remove => { let member = Member::new(node_id, name, old_addrs.clone(), [], is_learner); - self.cst - .map_lock(|mut cst_l| _ = cst_l.config.insert(node_id, is_learner)); self.lst.insert(node_id, is_learner); _ = self.ctx.sync_events.insert(node_id, Arc::new(Event::new())); let _ig1 = self.ctx.curp_storage.put_member(&member); @@ -1606,10 +1591,6 @@ impl RawCurp { Some(ConfChange::update(node_id, old_addrs)) } ConfChangeType::Promote => { - self.cst.map_lock(|mut cst_l| { - _ = cst_l.config.remove(node_id); - _ = cst_l.config.insert(node_id, true); - }); self.ctx.cluster_info.demote(node_id); self.lst.demote(node_id); let m = self.ctx.cluster_info.get(&node_id).unwrap_or_else(|| { @@ -2002,13 +1983,11 @@ impl RawCurp { /// switching fn switch_config(&self, conf_change: ConfChange) -> Option<(Vec, String, bool)> { let node_id = conf_change.node_id; - let mut cst_l = self.cst.lock(); #[allow(clippy::explicit_auto_deref)] // Avoid compiler complaint about `Dashmap::Ref` type let (modified, fallback_info) = match conf_change.change_type() { ConfChangeType::Add | ConfChangeType::AddLearner => { let is_learner = matches!(conf_change.change_type(), ConfChangeType::AddLearner); let member = Member::new(node_id, "", conf_change.address.clone(), [], is_learner); - _ = cst_l.config.insert(node_id, is_learner); self.lst.insert(node_id, is_learner); _ = self.ctx.sync_events.insert(node_id, Arc::new(Event::new())); let _ig = self.ctx.curp_storage.put_member(&member); @@ -2016,7 +1995,6 @@ impl RawCurp { (m.is_none(), Some((vec![], String::new(), is_learner))) } ConfChangeType::Remove => { - _ = cst_l.config.remove(node_id); self.lst.remove(node_id); _ = self.ctx.sync_events.remove(&node_id); _ = self.ctx.connects.remove(&node_id); @@ -2047,8 +2025,6 @@ impl RawCurp { ) } ConfChangeType::Promote => { - _ = cst_l.config.learners.remove(&node_id); - _ = cst_l.config.insert(node_id, false); self.lst.promote(node_id); let modified = self.ctx.cluster_info.promote(node_id); let m = self.ctx.cluster_info.get(&node_id).unwrap_or_else(|| { @@ -2067,12 +2043,7 @@ impl RawCurp { .unwrap_or_else(|_e| unreachable!("change_rx should not be dropped")); // TODO: We could wrap lst inside a role checking to prevent accidental lst // mutation - if self.is_leader() - && self - .lst - .get_transferee() - .is_some_and(|transferee| !cst_l.config.voters().contains(&transferee)) - { + if self.is_leader() && self.lst.get_transferee().is_some() { self.lst.reset_transferee(); } fallback_info diff --git a/crates/curp/src/server/raw_curp/state.rs b/crates/curp/src/server/raw_curp/state.rs index f1504888c..ff9e2a48d 100644 --- a/crates/curp/src/server/raw_curp/state.rs +++ b/crates/curp/src/server/raw_curp/state.rs @@ -1,5 +1,5 @@ use std::{ - collections::{HashMap, HashSet}, + collections::HashMap, pin::Pin, sync::atomic::{AtomicBool, AtomicU64, Ordering}, }; @@ -17,7 +17,7 @@ use madsim::rand::{thread_rng, Rng}; use tracing::{debug, warn}; use super::Role; -use crate::{members::ServerId, quorum, rpc::PoolEntry, LogIndex}; +use crate::{members::ServerId, rpc::PoolEntry, LogIndex}; /// Curp state #[derive(Debug)] @@ -50,8 +50,6 @@ pub(super) struct State { pub(super) struct CandidateState { /// Collected speculative pools, used for recovery pub(super) sps: HashMap>>, - /// config in current cluster - pub(super) config: Config, /// Votes received in the election pub(super) votes_received: HashMap, } @@ -286,17 +284,16 @@ impl LeaderState { impl CandidateState { /// Create a new `CandidateState` - pub(super) fn new(voters: impl Iterator) -> Self { + pub(super) fn new() -> Self { Self { sps: HashMap::new(), - config: Config::new(voters), votes_received: HashMap::new(), } } /// Check if the candidate has won the election pub(super) fn check_vote(&self) -> VoteResult { - self.config.majority_config.check_vote(&self.votes_received) + unimplemented!() } } @@ -306,98 +303,6 @@ trait ClusterConfig { fn check_vote(&self, votes_received: &HashMap) -> VoteResult; } -/// `MajorityConfig` is a set of IDs that uses majority quorums to make decisions. -#[derive(Debug, Clone)] -pub(super) struct MajorityConfig { - /// The voters in the cluster - voters: HashSet, -} - -/// Cluster config -#[derive(Debug, Clone)] -pub(super) struct Config { - /// The majority config - pub(super) majority_config: MajorityConfig, - /// The learners in the cluster - pub(super) learners: HashSet, -} - -impl Config { - /// Create a new `Config` - pub(super) fn new(voters: impl Iterator) -> Self { - Self { - majority_config: MajorityConfig::new(voters), - learners: HashSet::new(), - } - } - - /// Get voters of current config - pub(super) fn voters(&self) -> &HashSet { - &self.majority_config.voters - } - - /// Insert a voter - pub(super) fn insert(&mut self, id: ServerId, is_learner: bool) -> bool { - if is_learner { - self.learners.insert(id) - } else { - self.majority_config.voters.insert(id) - } - } - - /// Remove a node - pub(super) fn remove(&mut self, id: ServerId) -> bool { - let res1 = self.majority_config.voters.remove(&id); - let res2 = self.learners.remove(&id); - debug_assert!( - res1 ^ res2, - "a node should not exist in both voters and learners" - ); - res1 || res2 - } - - /// Check if a server exists - pub(super) fn contains(&self, id: ServerId) -> bool { - self.majority_config.voters.contains(&id) || self.learners.contains(&id) - } -} - -impl MajorityConfig { - /// Create a new `MajorityConfig` - fn new(voters: impl Iterator) -> Self { - Self { - voters: voters.collect(), - } - } -} - -impl ClusterConfig for MajorityConfig { - fn check_vote(&self, votes_received: &HashMap) -> VoteResult { - if self.voters.is_empty() { - return VoteResult::Won; - } - - let mut voted_cnt = 0; - let mut missing_cnt = 0; - for id in &self.voters { - match votes_received.get(id) { - Some(&true) => voted_cnt += 1, - None => missing_cnt += 1, - _ => {} - } - } - - let quorum = quorum(self.voters.len()); - if voted_cnt >= quorum { - return VoteResult::Won; - } - if voted_cnt + missing_cnt >= quorum { - return VoteResult::Pending; - } - VoteResult::Lost - } -} - /// Result of a vote #[derive(Debug, PartialEq)] pub(super) enum VoteResult { @@ -412,24 +317,8 @@ pub(super) enum VoteResult { #[cfg(test)] mod test { - use curp_test_utils::test_cmd::TestCommand; - - use super::*; - #[test] fn check_vote_should_return_right_vote_result() { - let servers = vec![1, 2, 3, 4, 5]; - let mut cst = CandidateState::::new(servers.into_iter()); - - cst.votes_received = - HashMap::from([(1, true), (2, true), (3, true), (4, false), (5, false)]); - assert_eq!(cst.check_vote(), VoteResult::Won); - - cst.votes_received = - HashMap::from([(1, true), (2, true), (3, false), (4, false), (5, false)]); - assert_eq!(cst.check_vote(), VoteResult::Lost); - - cst.votes_received = HashMap::from([(1, true), (2, true), (3, false), (4, false)]); - assert_eq!(cst.check_vote(), VoteResult::Pending); + unimplemented!() } } diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index 0cdf7a53b..8663c0519 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -29,7 +29,6 @@ impl RawCurp { self.cluster().all_members().contains_key(&id) && self.ctx.sync_events.contains_key(&id) && self.lst.get_all_statuses().contains_key(&id) - && self.cst.lock().config.contains(id) } #[allow(clippy::mem_forget)] // we should prevent the channel from being dropped @@ -132,10 +131,6 @@ impl RawCurp { .all_members() .get(&node_id) .is_some_and(|m| m.is_learner == is_learner) - && self.cst.map_lock(|cst_l| { - cst_l.config.learners.contains(&node_id) == is_learner - && cst_l.config.voters().contains(&1) != is_learner - }) } } From d4554d77aa57e071764836d47ad1f594d282081b Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 14 Aug 2024 18:15:22 +0800 Subject: [PATCH 155/322] refactor: implement vote result check in new membership change Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 6 ++++++ crates/curp/src/server/raw_curp/mod.rs | 27 +++++++++++++++--------- crates/curp/src/server/raw_curp/state.rs | 24 +-------------------- 3 files changed, 24 insertions(+), 33 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index be3c86e0e..20eb05073 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -11,6 +11,7 @@ use serde::Deserialize; use serde::Serialize; use crate::quorum::Joint; +use crate::quorum::QuorumSet; use crate::rpc::connect::InnerConnectApi; use crate::rpc::connect::InnerConnectApiWrapper; @@ -146,6 +147,11 @@ impl NodeMembershipState { }) .collect() } + + /// Returns `true` if the given set of nodes forms a quorum + pub(crate) fn check_quorum + Clone>(&self, nodes: I) -> bool { + self.cluster().effective().as_joint().is_quorum(nodes) + } } /// Membership state stored in current node diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 13108e7e5..c492abe77 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -89,7 +89,6 @@ use crate::rpc::Redirect; use crate::server::cmd_board::CmdBoardRef; use crate::server::metrics; use crate::server::raw_curp::log::FallbackContext; -use crate::server::raw_curp::state::VoteResult; use crate::snapshot::Snapshot; use crate::snapshot::SnapshotMeta; use crate::LogIndex; @@ -508,11 +507,12 @@ impl RawCurp { let mut st_w = RwLockUpgradableReadGuard::upgrade(st_r); let mut cst_l = self.cst.lock(); let log_r = self.log.upgradable_read(); + let ms_r = self.ms.read(); match st_w.role { Role::Follower | Role::PreCandidate => { - self.become_pre_candidate(&mut st_w, &mut cst_l, log_r) + self.become_pre_candidate(&mut st_w, &mut cst_l, log_r, &ms_r) } - Role::Candidate => self.become_candidate(&mut st_w, &mut cst_l, log_r), + Role::Candidate => self.become_candidate(&mut st_w, &mut cst_l, log_r, &ms_r), Role::Leader => { self.lst.reset_transferee(); None @@ -1102,7 +1102,9 @@ impl RawCurp { "a server can't vote twice" ); - if !matches!(cst_w.check_vote(), VoteResult::Won) { + let ms_r = self.ms.read(); + // TODO: implement early return if vote fail is definite + if !ms_r.check_quorum(cst_w.votes_received.keys().copied()) { return Ok(false); } @@ -1163,12 +1165,14 @@ impl RawCurp { debug!("{}'s pre vote is granted by server {}", self.id(), id); - if !matches!(cst_w.check_vote(), VoteResult::Won) { + let ms_r = self.ms.read(); + // TODO: implement early return if vote fail is definite + if !ms_r.check_quorum(cst_w.votes_received.keys().copied()) { return Ok(None); } let log_r = self.log.upgradable_read(); - Ok(self.become_candidate(&mut st_w, &mut cst_w, log_r)) + Ok(self.become_candidate(&mut st_w, &mut cst_w, log_r, &ms_r)) } /// Verify `install_snapshot` request @@ -1290,7 +1294,8 @@ impl RawCurp { } let mut cst_l = self.cst.lock(); let log_r = self.log.upgradable_read(); - self.become_candidate(&mut st_w, &mut cst_l, log_r) + let ms_r = self.ms.read(); + self.become_candidate(&mut st_w, &mut cst_l, log_r, &ms_r) } } @@ -1702,6 +1707,7 @@ impl RawCurp { st: &mut State, cst: &mut CandidateState, log: RwLockUpgradableReadGuard<'_, Log>, + ms: &NodeMembershipState, ) -> Option { let prev_role = st.role; assert_ne!(prev_role, Role::Leader, "leader can't start election"); @@ -1727,8 +1733,8 @@ impl RawCurp { debug!("{}'s vote is granted by server {}", self.id(), self.id()); cst.votes_received = HashMap::from([(self.id(), true)]); - if matches!(cst.check_vote(), VoteResult::Won) { - self.become_candidate(st, cst, log) + if ms.check_quorum(cst.votes_received.keys().copied()) { + self.become_candidate(st, cst, log, ms) } else { Some(Vote { term: st.term.overflow_add(1), @@ -1746,6 +1752,7 @@ impl RawCurp { st: &mut State, cst: &mut CandidateState, log: RwLockUpgradableReadGuard<'_, Log>, + ms: &NodeMembershipState, ) -> Option { let prev_role = st.role; assert_ne!(prev_role, Role::Leader, "leader can't start election"); @@ -1770,7 +1777,7 @@ impl RawCurp { cst.votes_received = HashMap::from([(self.id(), true)]); cst.sps = HashMap::from([(self.id(), self_sp)]); - if matches!(cst.check_vote(), VoteResult::Won) { + if ms.check_quorum(cst.votes_received.keys().copied()) { // single node cluster // vote is granted by the majority of servers, can become leader let spec_pools = cst.sps.drain().collect(); diff --git a/crates/curp/src/server/raw_curp/state.rs b/crates/curp/src/server/raw_curp/state.rs index ff9e2a48d..fd71bc190 100644 --- a/crates/curp/src/server/raw_curp/state.rs +++ b/crates/curp/src/server/raw_curp/state.rs @@ -290,28 +290,6 @@ impl CandidateState { votes_received: HashMap::new(), } } - - /// Check if the candidate has won the election - pub(super) fn check_vote(&self) -> VoteResult { - unimplemented!() - } -} - -/// Trait for cluster configuration -trait ClusterConfig { - /// Check if the candidate has won the election - fn check_vote(&self, votes_received: &HashMap) -> VoteResult; -} - -/// Result of a vote -#[derive(Debug, PartialEq)] -pub(super) enum VoteResult { - /// Won the election - Won, - /// Pending - Pending, - /// Lost the election - Lost, } #[cfg(test)] @@ -319,6 +297,6 @@ mod test { #[test] fn check_vote_should_return_right_vote_result() { - unimplemented!() + // unimplement } } From 2d649053d6f92851c44fd8d23d2b6197318ece54 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 14 Aug 2024 21:38:13 +0800 Subject: [PATCH 156/322] refactor: sp recovery in new membership change Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 9 +++-- crates/curp/src/server/raw_curp/mod.rs | 45 ++++++++++++++++-------- crates/curp/src/server/raw_curp/tests.rs | 2 +- 3 files changed, 38 insertions(+), 18 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 20eb05073..aaa98f5ae 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -149,8 +149,13 @@ impl NodeMembershipState { } /// Returns `true` if the given set of nodes forms a quorum - pub(crate) fn check_quorum + Clone>(&self, nodes: I) -> bool { - self.cluster().effective().as_joint().is_quorum(nodes) + pub(crate) fn check_quorum(&self, nodes: I, mut expect_quorum: Q) -> bool + where + I: IntoIterator + Clone, + Q: FnMut(&dyn QuorumSet>, Vec) -> bool, + { + let qs = self.cluster().effective().as_joint(); + expect_quorum(&qs, nodes.into_iter().collect()) } } diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index c492abe77..7f8a95dae 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -13,9 +13,11 @@ use std::cmp; use std::cmp::min; use std::collections::BTreeMap; +use std::collections::BTreeSet; use std::collections::HashMap; use std::collections::HashSet; use std::fmt::Debug; +use std::iter; use std::sync::atomic::AtomicU64; use std::sync::atomic::AtomicU8; use std::sync::atomic::Ordering; @@ -71,7 +73,7 @@ use crate::member::NodeMembershipState; use crate::members::ClusterInfo; use crate::members::ServerId; use crate::quorum; -use crate::recover_quorum; +use crate::quorum::QuorumSet; use crate::response::ResponseSender; use crate::role_change::RoleChange; use crate::rpc::connect::InnerConnectApi; @@ -1104,7 +1106,9 @@ impl RawCurp { let ms_r = self.ms.read(); // TODO: implement early return if vote fail is definite - if !ms_r.check_quorum(cst_w.votes_received.keys().copied()) { + if !ms_r.check_quorum(cst_w.votes_received.keys().copied(), |qs, ids| { + QuorumSet::is_quorum(qs, ids) + }) { return Ok(false); } @@ -1167,7 +1171,9 @@ impl RawCurp { let ms_r = self.ms.read(); // TODO: implement early return if vote fail is definite - if !ms_r.check_quorum(cst_w.votes_received.keys().copied()) { + if !ms_r.check_quorum(cst_w.votes_received.keys().copied(), |qs, ids| { + QuorumSet::is_quorum(qs, ids) + }) { return Ok(None); } @@ -1733,7 +1739,9 @@ impl RawCurp { debug!("{}'s vote is granted by server {}", self.id(), self.id()); cst.votes_received = HashMap::from([(self.id(), true)]); - if ms.check_quorum(cst.votes_received.keys().copied()) { + if ms.check_quorum(cst.votes_received.keys().copied(), |qs, ids| { + QuorumSet::is_quorum(qs, ids) + }) { self.become_candidate(st, cst, log, ms) } else { Some(Vote { @@ -1777,7 +1785,9 @@ impl RawCurp { cst.votes_received = HashMap::from([(self.id(), true)]); cst.sps = HashMap::from([(self.id(), self_sp)]); - if ms.check_quorum(cst.votes_received.keys().copied()) { + if ms.check_quorum(cst.votes_received.keys().copied(), |qs, ids| { + QuorumSet::is_quorum(qs, ids) + }) { // single node cluster // vote is granted by the majority of servers, can become leader let spec_pools = cst.sps.drain().collect(); @@ -1865,10 +1875,10 @@ impl RawCurp { &self, st: &State, log: &mut Log, - spec_pools: HashMap>>, + spec_pools: BTreeMap>>, ) { if log_enabled!(Level::Debug) { - let debug_sps: HashMap = spec_pools + let debug_sps: BTreeMap = spec_pools .iter() .map(|(id, sp)| { let sp: Vec = sp @@ -1881,19 +1891,24 @@ impl RawCurp { debug!("{} collected spec pools: {debug_sps:?}", self.id()); } - let mut entry_cnt: HashMap, usize)> = HashMap::new(); - for entry in spec_pools.into_values().flatten() { - let entry = entry_cnt.entry(entry.id).or_insert((entry, 0)); - entry.1 += 1; + let mut entry_ids = BTreeMap::, BTreeSet>::new(); + for (entry, id) in spec_pools + .into_iter() + .flat_map(|(id, entry)| entry.into_iter().zip(iter::repeat(id))) + { + let ids = entry_ids.entry(entry).or_default(); + let _ignore = ids.insert(id); } + let ms_r = self.ms.read(); // get all possibly executed(fast path) entries let existing_log_ids = log.get_cmd_ids(); - let recovered_cmds = entry_cnt - .into_values() + let recovered_cmds = entry_ids + .into_iter() // only cmds whose cnt >= ( f + 1 ) / 2 + 1 can be recovered - .filter_map(|(cmd, cnt)| { - (cnt >= recover_quorum(self.ctx.cluster_info.voters_len())).then_some(cmd) + .filter_map(|(cmd, ids)| { + ms_r.check_quorum(ids, |qs, i| QuorumSet::is_recover_quorum(qs, i)) + .then_some(cmd) }) // dedup in current logs .filter(|entry| { diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index 8663c0519..003227815 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -521,7 +521,7 @@ fn recover_from_spec_pools_will_pick_the_correct_cmds() { let s3_id = curp.cluster().get_id_by_name("S3").unwrap(); let s4_id = curp.cluster().get_id_by_name("S4").unwrap(); - let spec_pools = HashMap::from([ + let spec_pools = BTreeMap::from([ ( s0_id, vec![ From da62cb390bb30d154e8a6c014bf10dc3b6f042e6 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 14 Aug 2024 21:46:57 +0800 Subject: [PATCH 157/322] refactor: log commit condition in new membership change Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/raw_curp/mod.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 7f8a95dae..90fb31a57 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -72,7 +72,6 @@ use crate::member::MembershipInfo; use crate::member::NodeMembershipState; use crate::members::ClusterInfo; use crate::members::ServerId; -use crate::quorum; use crate::quorum::QuorumSet; use crate::response::ResponseSender; use crate::role_change::RoleChange; @@ -1335,6 +1334,11 @@ impl RawCurp { self.ctx.cluster_info.self_id() } + /// Get self's node id + pub(super) fn node_id(&self) -> u64 { + self.ms.read().node_id() + } + /// Get a rx for leader changes pub(super) fn leader_rx(&self) -> broadcast::Receiver> { self.ctx.leader_tx.subscribe() @@ -1862,12 +1866,15 @@ impl RawCurp { return false; } - let replicated_cnt = self + let replicated_ids: Vec<_> = self .lst .iter() - .filter(|f| !f.is_learner && f.match_index >= i) - .count(); - replicated_cnt + 1 >= quorum(self.ctx.cluster_info.voters_len()) + .filter_map(|f| (!f.is_learner && f.match_index >= i).then_some(*f.key())) + .chain(iter::once(self.node_id())) + .collect(); + + let ms_r = self.ms.read(); + ms_r.check_quorum(replicated_ids, |qs, ids| QuorumSet::is_quorum(qs, ids)) } /// Recover from all voter's spec pools From c2b45eed683662ecef35621f80c3fd58636a6c8c Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 15 Aug 2024 10:39:05 +0800 Subject: [PATCH 158/322] refactor: init leader state using membership info Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/raw_curp/mod.rs | 2 +- crates/curp/src/server/raw_curp/state.rs | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 90fb31a57..be04ceed2 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -194,7 +194,7 @@ impl RawCurpBuilder { args.cfg.follower_timeout_ticks, args.cfg.candidate_timeout_ticks, )); - let lst = LeaderState::new(&args.cluster_info.peers_ids()); + let lst = LeaderState::new(args.membership_info.init_members.keys().copied()); let cst = Mutex::new(CandidateState::new()); let log = RwLock::new(Log::new(args.cfg.batch_max_size, args.cfg.log_entries_cap)); diff --git a/crates/curp/src/server/raw_curp/state.rs b/crates/curp/src/server/raw_curp/state.rs index fd71bc190..c26a75edb 100644 --- a/crates/curp/src/server/raw_curp/state.rs +++ b/crates/curp/src/server/raw_curp/state.rs @@ -156,11 +156,14 @@ impl State { impl LeaderState { /// Create a `LeaderState` - pub(super) fn new(others: &[ServerId]) -> Self { + pub(super) fn new(others: I) -> Self + where + I: IntoIterator, + { Self { statuses: others - .iter() - .map(|o| (*o, FollowerStatus::default())) + .into_iter() + .map(|o| (o, FollowerStatus::default())) .collect(), leader_transferee: AtomicU64::new(0), no_op_state: NoOpState::default(), From 9abe8a5ccaa17920b98d849886dbd370390ac2b5 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 15 Aug 2024 11:11:05 +0800 Subject: [PATCH 159/322] refactor: remove old membership change implementation Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/proto/common | 2 +- crates/curp/src/client/connect.rs | 15 +- crates/curp/src/client/keep_alive.rs | 14 +- crates/curp/src/client/retry/mod.rs | 15 +- crates/curp/src/client/unary/mod.rs | 26 +- crates/curp/src/lib.rs | 10 - crates/curp/src/log_entry/entry_data.rs | 11 +- crates/curp/src/members.rs | 34 +- crates/curp/src/rpc/connect/mod.rs | 51 +-- crates/curp/src/rpc/mod.rs | 92 ------ crates/curp/src/rpc/reconnect.rs | 15 +- crates/curp/src/server/cmd_worker/mod.rs | 56 +--- crates/curp/src/server/curp_node/mod.rs | 164 +--------- crates/curp/src/server/mod.rs | 16 - crates/curp/src/server/raw_curp/log.rs | 70 +---- crates/curp/src/server/raw_curp/mod.rs | 360 +--------------------- crates/curp/src/server/raw_curp/state.rs | 45 --- crates/curp/src/server/raw_curp/tests.rs | 12 + crates/curp/tests/it/server.rs | 25 +- crates/xline/src/server/auth_wrapper.rs | 12 +- crates/xline/src/server/cluster_server.rs | 114 +------ 21 files changed, 74 insertions(+), 1085 deletions(-) diff --git a/crates/curp/proto/common b/crates/curp/proto/common index 56c792d19..1e485f7b5 160000 --- a/crates/curp/proto/common +++ b/crates/curp/proto/common @@ -1 +1 @@ -Subproject commit 56c792d19852b7ec1943cac48487e7eb0b5d8ef4 +Subproject commit 1e485f7b531b75f24423b8746dc2dee24e83899d diff --git a/crates/curp/src/client/connect.rs b/crates/curp/src/client/connect.rs index d5bb980f3..f3443ddc7 100644 --- a/crates/curp/src/client/connect.rs +++ b/crates/curp/src/client/connect.rs @@ -4,7 +4,7 @@ use tracing::debug; use crate::{ members::ServerId, - rpc::{ConfChange, FetchClusterResponse, Member, ReadState}, + rpc::{FetchClusterResponse, ReadState}, }; use super::retry::Context; @@ -34,12 +34,6 @@ pub trait ClientApi { use_fast_path: bool, ) -> Result, Self::Error>; - /// Send propose configuration changes to the cluster - async fn propose_conf_change( - &self, - changes: Vec, - ) -> Result, Self::Error>; - /// Send propose to shutdown cluster async fn propose_shutdown(&self) -> Result<(), Self::Error>; @@ -110,13 +104,6 @@ pub(crate) trait RepeatableClientApi { ctx: Context, ) -> Result, Self::Error>; - /// Send propose configuration changes to the cluster - async fn propose_conf_change( - &self, - changes: Vec, - ctx: Context, - ) -> Result, Self::Error>; - /// Send propose to shutdown cluster async fn propose_shutdown(&self, ctx: Context) -> Result<(), Self::Error>; diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index 467475bc1..aafbb1299 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -157,9 +157,8 @@ mod tests { AddLearnerRequest, AddLearnerResponse, CurpError, FetchClusterRequest, FetchClusterResponse, FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, FetchReadStateResponse, Member, MoveLeaderRequest, - MoveLeaderResponse, OpResponse, ProposeConfChangeRequest, ProposeConfChangeResponse, - ProposeId, ProposeRequest, ProposeResponse, PublishRequest, PublishResponse, - ReadIndexResponse, RecordRequest, RecordResponse, RemoveLearnerRequest, + MoveLeaderResponse, OpResponse, ProposeId, ProposeRequest, ProposeResponse, PublishRequest, + PublishResponse, ReadIndexResponse, RecordRequest, RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, ResponseOp, ShutdownRequest, ShutdownResponse, SyncedResponse, }; @@ -211,15 +210,6 @@ mod tests { unreachable!("please use MockedConnectApi") } - /// Send `ProposeConfChange` - async fn propose_conf_change( - &self, - _request: ProposeConfChangeRequest, - _timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") - } - /// Send `PublishRequest` async fn publish( &self, diff --git a/crates/curp/src/client/retry/mod.rs b/crates/curp/src/client/retry/mod.rs index c92fb1f4b..d2c985036 100644 --- a/crates/curp/src/client/retry/mod.rs +++ b/crates/curp/src/client/retry/mod.rs @@ -19,8 +19,7 @@ use super::{ }; use crate::{ members::ServerId, - rpc::{connects, ConfChange, CurpError, FetchClusterResponse, Member, ProposeId, ReadState}, - tracker::Tracker, + rpc::{CurpError, FetchClusterResponse, ReadState, Redirect, ProposeId}, tracker::Tracker, }; /// Backoff config @@ -399,18 +398,6 @@ where .await } - /// Send propose configuration changes to the cluster - async fn propose_conf_change( - &self, - changes: Vec, - ) -> Result, tonic::Status> { - self.retry::<_, _>(|client, ctx| { - let changes_c = changes.clone(); - async move { RepeatableClientApi::propose_conf_change(client, changes_c, ctx).await } - }) - .await - } - /// Send propose to shutdown cluster async fn propose_shutdown(&self) -> Result<(), tonic::Status> { self.retry::<_, _>(|client, ctx| async move { diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index ecd938709..bafeb2dc3 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -15,8 +15,8 @@ use super::{ use crate::{ members::ServerId, rpc::{ - AddLearnerRequest, ConfChange, CurpError, FetchReadStateRequest, Member, MoveLeaderRequest, - ProposeConfChangeRequest, PublishRequest, ReadState, RemoveLearnerRequest, ShutdownRequest, + AddLearnerRequest, CurpError, FetchReadStateRequest, MoveLeaderRequest, PublishRequest, + ReadState, RemoveLearnerRequest, ShutdownRequest, }, }; @@ -64,28 +64,6 @@ impl RepeatableClientApi for Unary { } } - /// Send propose configuration changes to the cluster - async fn propose_conf_change( - &self, - changes: Vec, - ctx: Context, - ) -> Result, Self::Error> { - let req = ProposeConfChangeRequest::new( - ctx.propose_id(), - changes, - ctx.cluster_state().cluster_version(), - ); - let timeout = self.config.wait_synced_timeout(); - let members = ctx - .cluster_state() - .map_leader(|conn| async move { conn.propose_conf_change(req, timeout).await }) - .await? - .into_inner() - .members; - - Ok(members) - } - /// Send propose to shutdown cluster async fn propose_shutdown(&self, ctx: Context) -> Result<(), Self::Error> { let req = ShutdownRequest::new(ctx.propose_id(), ctx.cluster_state().cluster_version()); diff --git a/crates/curp/src/lib.rs b/crates/curp/src/lib.rs index 58b57b4b0..dc715ebbd 100644 --- a/crates/curp/src/lib.rs +++ b/crates/curp/src/lib.rs @@ -220,15 +220,6 @@ fn quorum(size: usize) -> usize { size / 2 + 1 } -/// Calculate the `recover_quorum`: the smallest number of servers who must -/// contain a command in speculative pool for it to be recovered -#[inline] -#[must_use] -#[allow(clippy::arithmetic_side_effects)] // it's safe -fn recover_quorum(size: usize) -> usize { - quorum(size) / 2 + 1 -} - #[cfg(test)] mod test { use super::*; @@ -249,7 +240,6 @@ mod test { for (node_cnt, expected) in nodes.into_iter().zip(expected_res.into_iter()) { assert_eq!(quorum(node_cnt), expected.0); - assert_eq!(recover_quorum(node_cnt), expected.1); } } } diff --git a/crates/curp/src/log_entry/entry_data.rs b/crates/curp/src/log_entry/entry_data.rs index 2b7409ebb..8eed4cfd3 100644 --- a/crates/curp/src/log_entry/entry_data.rs +++ b/crates/curp/src/log_entry/entry_data.rs @@ -3,10 +3,9 @@ use std::sync::Arc; use serde::Deserialize; use serde::Serialize; +use crate::member::Membership; use crate::members::ServerId; -use crate::rpc::ConfChange; use crate::rpc::PublishRequest; -use crate::member::Membership; /// Entry data of a `LogEntry` #[derive(Debug, Clone, Serialize, Deserialize)] @@ -16,8 +15,6 @@ pub(crate) enum EntryData { Empty, /// `Command` entry Command(Arc), - /// `ConfChange` entry - ConfChange(Vec), /// `Shutdown` entry Shutdown, /// `SetNodeState` entry @@ -32,12 +29,6 @@ impl From> for EntryData { } } -impl From> for EntryData { - fn from(value: Vec) -> Self { - Self::ConfChange(value) - } -} - impl From for EntryData { fn from(value: PublishRequest) -> Self { EntryData::SetNodeState(value.node_id, value.name, value.client_urls) diff --git a/crates/curp/src/members.rs b/crates/curp/src/members.rs index 5682268f1..3658cad1d 100644 --- a/crates/curp/src/members.rs +++ b/crates/curp/src/members.rs @@ -1,6 +1,6 @@ use std::{ collections::{hash_map::DefaultHasher, HashMap}, - hash::{Hash, Hasher}, + hash::Hasher, sync::{ atomic::{AtomicU64, Ordering}, Arc, @@ -339,22 +339,6 @@ impl ClusterInfo { self.cluster_version.load(Ordering::Relaxed) } - /// cluster version decrease - pub(crate) fn cluster_version_update(&self) { - let mut hasher = std::collections::hash_map::DefaultHasher::new(); - self.all_members_peer_urls() - .into_iter() - .sorted() - .for_each(|(id, mut addrs)| { - id.hash(&mut hasher); - addrs.sort(); - addrs.hash(&mut hasher); - }); - let ver = hasher.finish(); - info!("cluster version updates to {ver}"); - self.cluster_version.store(ver, Ordering::Relaxed); - } - /// Get peers #[must_use] #[inline] @@ -393,22 +377,6 @@ impl ClusterInfo { .find_map(|m| (m.name == name).then_some(m.id)) } - /// Promote a learner to voter - pub(crate) fn promote(&self, node_id: ServerId) -> bool { - if let Some(mut s) = self.members.get_mut(&node_id) { - s.is_learner = false; - return true; - } - false - } - - /// Demote a voter to learner - pub(crate) fn demote(&self, node_id: ServerId) { - if let Some(mut s) = self.members.get_mut(&node_id) { - s.is_learner = true; - } - } - /// Check if cluster contains a node pub(crate) fn contains(&self, node_id: ServerId) -> bool { self.members.contains_key(&node_id) diff --git a/crates/curp/src/rpc/connect/mod.rs b/crates/curp/src/rpc/connect/mod.rs index 4cc5aeb2a..665bcea64 100644 --- a/crates/curp/src/rpc/connect/mod.rs +++ b/crates/curp/src/rpc/connect/mod.rs @@ -40,9 +40,9 @@ use crate::{ AppendEntriesRequest, AppendEntriesResponse, CurpError, FetchClusterRequest, FetchClusterResponse, FetchReadStateRequest, FetchReadStateResponse, InstallSnapshotRequest, InstallSnapshotResponse, LeaseKeepAliveMsg, MoveLeaderRequest, - MoveLeaderResponse, ProposeConfChangeRequest, ProposeConfChangeResponse, ProposeRequest, - Protocol, PublishRequest, PublishResponse, ShutdownRequest, ShutdownResponse, - TriggerShutdownRequest, TryBecomeLeaderNowRequest, VoteRequest, VoteResponse, + MoveLeaderResponse, ProposeRequest, Protocol, PublishRequest, PublishResponse, + ShutdownRequest, ShutdownResponse, TriggerShutdownRequest, TryBecomeLeaderNowRequest, + VoteRequest, VoteResponse, }, server::StreamingProtocol, snapshot::Snapshot, @@ -193,13 +193,6 @@ pub(crate) trait ConnectApi: Send + Sync + 'static { timeout: Duration, ) -> Result, CurpError>; - /// Send `ProposeRequest` - async fn propose_conf_change( - &self, - request: ProposeConfChangeRequest, - timeout: Duration, - ) -> Result, CurpError>; - /// Send `PublishRequest` async fn publish( &self, @@ -309,16 +302,6 @@ impl InnerConnectApiWrapper { pub(crate) fn new_from_arc(connect: Arc) -> Self { Self(connect) } - - /// Create a new `InnerConnectApiWrapper` from id and addrs - pub(crate) fn connect( - id: ServerId, - addrs: Vec, - tls_config: Option, - ) -> Self { - let conn = connect_to::>(id, addrs, tls_config); - InnerConnectApiWrapper::new_from_arc(Arc::new(conn)) - } } impl Debug for InnerConnectApiWrapper { @@ -489,19 +472,6 @@ impl ConnectApi for Connect> { with_timeout!(timeout, client.shutdown(req)).map_err(Into::into) } - /// Send `ProposeRequest` - #[instrument(skip(self), name = "client propose conf change")] - async fn propose_conf_change( - &self, - request: ProposeConfChangeRequest, - timeout: Duration, - ) -> Result, CurpError> { - let mut client = self.rpc_connect.clone(); - let mut req = tonic::Request::new(request); - req.metadata_mut().inject_current(); - with_timeout!(timeout, client.propose_conf_change(req)).map_err(Into::into) - } - /// Send `PublishRequest` #[instrument(skip(self), name = "client publish")] async fn publish( @@ -813,21 +783,6 @@ where self.server.publish(req).await.map_err(Into::into) } - /// Send `ProposeRequest` - async fn propose_conf_change( - &self, - request: ProposeConfChangeRequest, - _timeout: Duration, - ) -> Result, CurpError> { - let mut req = tonic::Request::new(request); - req.metadata_mut().inject_bypassed(); - req.metadata_mut().inject_current(); - self.server - .propose_conf_change(req) - .await - .map_err(Into::into) - } - /// Send `ShutdownRequest` async fn shutdown( &self, diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index 004a71a28..684bfa99d 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -24,7 +24,6 @@ pub use self::proto::{ curp_error::Redirect, fetch_read_state_response::{IdSet, ReadState}, op_response::Op as ResponseOp, - propose_conf_change_request::{ConfChange, ConfChangeType}, protocol_client, protocol_server::{Protocol, ProtocolServer}, AddLearnerRequest, @@ -45,8 +44,6 @@ pub use self::proto::{ Node, OpResponse, OptionalU64, - ProposeConfChangeRequest, - ProposeConfChangeResponse, ProposeId as PbProposeId, ProposeRequest, ProposeResponse, @@ -472,95 +469,6 @@ impl FetchReadStateResponse { } } -#[allow(clippy::as_conversions)] // ConfChangeType is so small that it won't exceed the range of i32 type. -impl ConfChange { - /// Create a new `ConfChange` to add a node - #[must_use] - #[inline] - pub fn add(node_id: ServerId, address: Vec) -> Self { - Self { - change_type: ConfChangeType::Add as i32, - node_id, - address, - } - } - - /// Create a new `ConfChange` to remove a node - #[must_use] - #[inline] - pub fn remove(node_id: ServerId) -> Self { - Self { - change_type: ConfChangeType::Remove as i32, - node_id, - address: vec![], - } - } - - /// Create a new `ConfChange` to update a node - #[must_use] - #[inline] - pub fn update(node_id: ServerId, address: Vec) -> Self { - Self { - change_type: ConfChangeType::Update as i32, - node_id, - address, - } - } - - /// Create a new `ConfChange` to add a learner node - #[must_use] - #[inline] - pub fn add_learner(node_id: ServerId, address: Vec) -> Self { - Self { - change_type: ConfChangeType::AddLearner as i32, - node_id, - address, - } - } - - /// Create a new `ConfChange` to promote a learner node - #[must_use] - #[inline] - pub fn promote_learner(node_id: ServerId) -> Self { - Self { - change_type: ConfChangeType::Promote as i32, - node_id, - address: vec![], - } - } - - /// Create a new `ConfChange` to promote a node - #[must_use] - #[inline] - pub fn promote(node_id: ServerId) -> Self { - Self { - change_type: ConfChangeType::Promote as i32, - node_id, - address: vec![], - } - } -} - -impl ProposeConfChangeRequest { - /// Create a new `ProposeConfChangeRequest` - pub(crate) fn new(id: ProposeId, changes: Vec, cluster_version: u64) -> Self { - Self { - propose_id: Some(id.into()), - changes, - cluster_version, - } - } - - /// Get id of the request - pub(crate) fn propose_id(&self) -> ProposeId { - self.propose_id - .unwrap_or_else(|| { - unreachable!("propose id should be set in propose conf change request") - }) - .into() - } -} - impl ShutdownRequest { /// Create a new shutdown request pub(crate) fn new(id: ProposeId, cluster_version: u64) -> Self { diff --git a/crates/curp/src/rpc/reconnect.rs b/crates/curp/src/rpc/reconnect.rs index a51350aa6..18c2346fc 100644 --- a/crates/curp/src/rpc/reconnect.rs +++ b/crates/curp/src/rpc/reconnect.rs @@ -10,9 +10,9 @@ use crate::{ connect::ConnectApi, AddLearnerRequest, AddLearnerResponse, CurpError, FetchClusterRequest, FetchClusterResponse, FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, FetchReadStateResponse, MoveLeaderRequest, MoveLeaderResponse, - OpResponse, ProposeConfChangeRequest, ProposeConfChangeResponse, ProposeRequest, - PublishRequest, PublishResponse, ReadIndexResponse, RecordRequest, RecordResponse, - RemoveLearnerRequest, RemoveLearnerResponse, ShutdownRequest, ShutdownResponse, + OpResponse, ProposeRequest, PublishRequest, PublishResponse, ReadIndexResponse, + RecordRequest, RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, + ShutdownRequest, ShutdownResponse, }, }; @@ -119,15 +119,6 @@ impl ConnectApi for Reconnect { execute_with_reconnect!(self, ConnectApi::read_index, timeout) } - /// Send `ProposeRequest` - async fn propose_conf_change( - &self, - request: ProposeConfChangeRequest, - timeout: Duration, - ) -> Result, CurpError> { - execute_with_reconnect!(self, ConnectApi::propose_conf_change, request, timeout) - } - /// Send `PublishRequest` async fn publish( &self, diff --git a/crates/curp/src/server/cmd_worker/mod.rs b/crates/curp/src/server/cmd_worker/mod.rs index 2b0f43965..619b44194 100644 --- a/crates/curp/src/server/cmd_worker/mod.rs +++ b/crates/curp/src/server/cmd_worker/mod.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use curp_external_api::cmd::{AfterSyncCmd, AfterSyncOk}; use tokio::sync::oneshot; -use tracing::{debug, error, info, warn}; +use tracing::{debug, error, info}; use super::{curp_node::AfterSyncEntry, raw_curp::RawCurp}; use crate::{ @@ -13,7 +13,7 @@ use crate::{ log_entry::{EntryData, LogEntry}, response::ResponseSender, role_change::RoleChange, - rpc::{ConfChangeType, PoolEntry, ProposeId, ProposeResponse, SyncedResponse}, + rpc::{PoolEntry, ProposeId, ProposeResponse, SyncedResponse}, snapshot::{Snapshot, SnapshotMeta}, }; @@ -145,7 +145,7 @@ where } /// After sync entries other than cmd -async fn after_sync_others, RC: RoleChange>( +fn after_sync_others, RC: RoleChange>( others: Vec>, ce: &CE, curp: &RawCurp, @@ -165,54 +165,6 @@ async fn after_sync_others, RC: RoleChange>( } cb.write().notify_shutdown(); } - (EntryData::ConfChange(ref conf_change), _) => { - if let Err(e) = ce.set_last_applied(entry.index) { - error!("failed to set last_applied, {e}"); - return; - } - let change = conf_change.first().unwrap_or_else(|| { - unreachable!("conf change should always have at least one change") - }); - let shutdown_self = - change.change_type() == ConfChangeType::Remove && change.node_id == id; - cb.write().insert_conf(entry.propose_id); - remove_from_sp_ucp(curp, Some(&entry)); - if shutdown_self { - if let Some(maybe_new_leader) = curp.pick_new_leader() { - info!( - "the old leader {} will shutdown, try to move leadership to {}", - id, maybe_new_leader - ); - if curp - .handle_move_leader(maybe_new_leader) - .unwrap_or_default() - { - if let Err(e) = curp - .connects() - .get(&maybe_new_leader) - .unwrap_or_else(|| { - unreachable!("connect to {} should exist", maybe_new_leader) - }) - .try_become_leader_now(curp.cfg().wait_synced_timeout) - .await - { - warn!( - "{} send try become leader now to {} failed: {:?}", - curp.id(), - maybe_new_leader, - e - ); - }; - } - } else { - info!( - "the old leader {} will shutdown, but no other node can be the leader now", - id - ); - } - curp.task_manager().shutdown(false).await; - } - } (EntryData::SetNodeState(node_id, ref name, ref client_urls), _) => { info!("setting node state: {node_id}, urls: {:?}", client_urls); if let Err(e) = ce.set_last_applied(entry.index) { @@ -245,7 +197,7 @@ pub(super) async fn after_sync, RC: RoleChang .into_iter() .partition(|(entry, _)| matches!(entry.entry_data, EntryData::Command(_))); after_sync_cmds(&cmd_entries, ce, curp); - after_sync_others(others, ce, curp).await; + after_sync_others(others, ce, curp); } /// Cmd worker reset handler diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index f4b2aca69..4628ddc9e 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -47,15 +47,14 @@ use crate::{ rpc::{ self, connect::{InnerConnectApi, InnerConnectApiWrapper}, - AppendEntriesRequest, AppendEntriesResponse, ConfChange, ConfChangeType, CurpError, - FetchClusterRequest, FetchClusterResponse, FetchMembershipRequest, FetchMembershipResponse, + AppendEntriesRequest, AppendEntriesResponse, CurpError, FetchClusterRequest, + FetchClusterResponse, FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, FetchReadStateResponse, InstallSnapshotRequest, InstallSnapshotResponse, LeaseKeepAliveMsg, MoveLeaderRequest, MoveLeaderResponse, Node, - PoolEntry, ProposeConfChangeRequest, ProposeConfChangeResponse, ProposeId, ProposeRequest, - ProposeResponse, PublishRequest, PublishResponse, QuorumSet, ReadIndexResponse, - RecordRequest, RecordResponse, ShutdownRequest, ShutdownResponse, SyncedResponse, - TriggerShutdownRequest, TriggerShutdownResponse, TryBecomeLeaderNowRequest, - TryBecomeLeaderNowResponse, VoteRequest, VoteResponse, + PoolEntry, ProposeId, ProposeRequest, ProposeResponse, PublishRequest, PublishResponse, + QuorumSet, ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, + ShutdownResponse, SyncedResponse, TriggerShutdownRequest, TriggerShutdownResponse, + TryBecomeLeaderNowRequest, TryBecomeLeaderNowResponse, VoteRequest, VoteResponse, }, server::{ cmd_worker::{after_sync, worker_reset, worker_snapshot}, @@ -348,23 +347,6 @@ impl, RC: RoleChange> CurpNode { Ok(ShutdownResponse::default()) } - /// Handle `ProposeConfChange` requests - pub(super) async fn propose_conf_change( - &self, - req: ProposeConfChangeRequest, - bypassed: bool, - ) -> Result { - self.check_cluster_version(req.cluster_version)?; - let id = req.propose_id(); - if bypassed { - self.curp.mark_client_id_bypassed(id.0); - } - self.curp.handle_propose_conf_change(id, req.changes)?; - CommandBoard::wait_for_conf(&self.cmd_board, id).await; - let members = self.curp.cluster().all_members_vec(); - Ok(ProposeConfChangeResponse { members }) - } - /// Handle `Publish` requests pub(super) fn publish( &self, @@ -715,72 +697,6 @@ impl, RC: RoleChange> CurpNode { } } - /// Handler of conf change - async fn conf_change_handler( - curp: Arc>, - mut remove_events: HashMap>, - shutdown_listener: Listener, - ) { - let task_manager = curp.task_manager(); - let change_rx = curp.change_rx(); - #[allow(clippy::arithmetic_side_effects, clippy::ignored_unit_patterns)] - // introduced by tokio select - loop { - let change: ConfChange = tokio::select! { - _ = shutdown_listener.wait() => break, - change_res = change_rx.recv_async() => { - let Ok(change) = change_res else { - break; - }; - change - }, - }; - match change.change_type() { - ConfChangeType::Add | ConfChangeType::AddLearner => { - let connect = InnerConnectApiWrapper::connect( - change.node_id, - change.address, - curp.client_tls_config().cloned(), - ); - curp.insert_connect(connect.clone()); - let sync_event = curp.sync_event(change.node_id); - let remove_event = Arc::new(Event::new()); - - task_manager.spawn(TaskName::SyncFollower, |n| { - Self::sync_follower_task( - Arc::clone(&curp), - connect, - sync_event, - Arc::clone(&remove_event), - n, - ) - }); - _ = remove_events.insert(change.node_id, remove_event); - } - ConfChangeType::Remove => { - if change.node_id == curp.id() { - break; - } - let Some(event) = remove_events.remove(&change.node_id) else { - unreachable!( - "({:?}) shutdown_event of removed follower ({:x}) should exist", - curp.id(), - change.node_id - ); - }; - let _ignore = event.notify(1); - } - ConfChangeType::Update => { - if let Err(e) = curp.update_connect(change.node_id, change.address).await { - error!("update connect {} failed, err {:?}", change.node_id, e); - continue; - } - } - ConfChangeType::Promote => {} - } - } - } - /// This task will keep a follower up-to-data when current node is leader, /// and it will wait for `leader_event` if current node is not leader #[allow(clippy::arithmetic_side_effects, clippy::ignored_unit_patterns)] // tokio select internal triggered @@ -1039,9 +955,6 @@ impl, RC: RoleChange> CurpNode { _ = remove_events.insert(c.id(), remove_event); } - task_manager.spawn(TaskName::ConfChange, |n| { - Self::conf_change_handler(Arc::clone(&curp), remove_events, n) - }); task_manager.spawn(TaskName::HandlePropose, |_n| { Self::handle_propose_task(Arc::clone(&cmd_executor), Arc::clone(&curp), propose_rx) }); @@ -1326,7 +1239,7 @@ mod tests { use tracing_test::traced_test; use super::*; - use crate::rpc::{connect::MockInnerConnectApi, ConfChange}; + use crate::rpc::connect::MockInnerConnectApi; #[traced_test] #[tokio::test] @@ -1406,67 +1319,8 @@ mod tests { task_manager.shutdown(true).await; } + #[cfg(ignore)] #[traced_test] #[tokio::test] - async fn vote_will_not_send_to_learner_during_election() { - let task_manager = Arc::new(TaskManager::new()); - let curp = { - Arc::new(RawCurp::new_test( - 3, - mock_role_change(), - Arc::clone(&task_manager), - )) - }; - - let learner_id = 123; - let s1_id = curp.cluster().get_id_by_name("S1").unwrap(); - let s2_id = curp.cluster().get_id_by_name("S2").unwrap(); - - let _ig = curp.apply_conf_change(vec![ConfChange::add_learner( - learner_id, - vec!["address".to_owned()], - )]); - - curp.handle_append_entries(1, s2_id, 0, 0, vec![], 0, |_, _, _| {}) - .unwrap(); - - let mut mock_connect1 = MockInnerConnectApi::default(); - mock_connect1.expect_vote().returning(|req, _| { - Ok(tonic::Response::new( - VoteResponse::new_accept::(req.term, vec![]).unwrap(), - )) - }); - mock_connect1.expect_id().return_const(s1_id); - curp.set_connect( - s1_id, - InnerConnectApiWrapper::new_from_arc(Arc::new(mock_connect1)), - ); - - let mut mock_connect2 = MockInnerConnectApi::default(); - mock_connect2.expect_vote().returning(|req, _| { - Ok(tonic::Response::new( - VoteResponse::new_accept::(req.term, vec![]).unwrap(), - )) - }); - mock_connect2.expect_id().return_const(s2_id); - curp.set_connect( - s2_id, - InnerConnectApiWrapper::new_from_arc(Arc::new(mock_connect2)), - ); - - let mut mock_connect_learner = MockInnerConnectApi::default(); - mock_connect_learner - .expect_vote() - .returning(|_, _| panic!("should not send vote to learner")); - curp.set_connect( - learner_id, - InnerConnectApiWrapper::new_from_arc(Arc::new(mock_connect_learner)), - ); - task_manager.spawn(TaskName::Election, |n| { - CurpNode::<_, TestCE, _>::election_task(Arc::clone(&curp), n) - }); - sleep_secs(3).await; - assert!(curp.is_leader()); - task_manager.shutdown(true).await; - } + async fn vote_will_not_send_to_learner_during_election() {} } diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index 89998aba0..78d4a6cfa 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -42,8 +42,6 @@ use crate::rpc::LeaseKeepAliveMsg; use crate::rpc::MoveLeaderRequest; use crate::rpc::MoveLeaderResponse; use crate::rpc::OpResponse; -use crate::rpc::ProposeConfChangeRequest; -use crate::rpc::ProposeConfChangeResponse; use crate::rpc::ProposeRequest; use crate::rpc::PublishRequest; use crate::rpc::PublishResponse; @@ -161,20 +159,6 @@ impl, RC: RoleChange> crate::rpc::Protocol fo )) } - #[instrument(skip_all, name = "curp_propose_conf_change")] - async fn propose_conf_change( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - let bypassed = request.metadata().is_bypassed(); - request.metadata().extract_span(); - Ok(tonic::Response::new( - self.inner - .propose_conf_change(request.into_inner(), bypassed) - .await?, - )) - } - #[instrument(skip_all, name = "curp_publish")] async fn publish( &self, diff --git a/crates/curp/src/server/raw_curp/log.rs b/crates/curp/src/server/raw_curp/log.rs index 81062df3e..9ddef76be 100644 --- a/crates/curp/src/server/raw_curp/log.rs +++ b/crates/curp/src/server/raw_curp/log.rs @@ -2,11 +2,10 @@ use std::{ cmp::{min, Ordering}, - collections::{HashMap, HashSet, VecDeque}, + collections::{HashSet, VecDeque}, fmt::Debug, ops::{Bound, Range, RangeBounds, RangeInclusive}, sync::Arc, - vec, }; use clippy_utilities::NumericCast; @@ -17,7 +16,6 @@ use crate::{ cmd::Command, log_entry::{EntryData, LogEntry}, rpc::ProposeId, - server::metrics, snapshot::SnapshotMeta, LogIndex, }; @@ -116,41 +114,10 @@ pub(super) struct Log { pub(super) last_as: LogIndex, /// Index of highest log entry sent to speculatively exe. `last_exe` should always be greater than or equal to `last_as`. pub(super) last_exe: LogIndex, - /// Contexts of fallback log entries - pub(super) fallback_contexts: HashMap>, /// Entries to keep in memory entries_cap: usize, } -/// Context of fallback conf change entry -pub(super) struct FallbackContext { - /// The origin entry - pub(super) origin_entry: Arc>, - /// The addresses of the old config - pub(super) addrs: Vec, - /// The name of the old config - pub(super) name: String, - /// Whether the old config is a learner - pub(super) is_learner: bool, -} - -impl FallbackContext { - /// Create a new fallback context - pub(super) fn new( - origin_entry: Arc>, - addrs: Vec, - name: String, - is_learner: bool, - ) -> Self { - Self { - origin_entry, - addrs, - name, - is_learner, - } - } -} - impl Log { /// Shortens the log entries, keeping the first `len` elements and dropping /// the rest. @@ -310,18 +277,8 @@ impl Debug for Log { } } -/// Conf change entries type -type ConfChangeEntries = Vec>>; -/// Fallback indexes type -type FallbackIndexes = HashSet; - /// Type retruned when append success -type AppendSuccess = ( - Vec>>, - ConfChangeEntries, - FallbackIndexes, - LogIndex, -); +type AppendSuccess = (Vec>>, LogIndex); impl Log { /// Create a new log @@ -337,7 +294,6 @@ impl Log { base_term: 0, last_as: 0, last_exe: 0, - fallback_contexts: HashMap::new(), entries_cap, } } @@ -383,8 +339,6 @@ impl Log { prev_log_term: u64, ) -> Result, Vec>> { let mut to_persist = Vec::with_capacity(entries.len()); - let mut conf_changes = vec![]; - let mut need_fallback_indexes = HashSet::new(); // check if entries can be appended if self.get(prev_log_index).map_or_else( || (self.base_index, self.base_term) != (prev_log_index, prev_log_term), @@ -405,12 +359,6 @@ impl Log { } pi += 1; } - // Record entries that need to be fallback in the truncated entries - for e in self.entries.range(pi..) { - if matches!(e.inner.entry_data, EntryData::ConfChange(_)) { - let _ig = need_fallback_indexes.insert(e.inner.index); - } - } // Truncate entries self.truncate(pi); let truncate_at = self @@ -423,9 +371,6 @@ impl Log { .skip(pi - self.li_to_pi(prev_log_index + 1)) .map(Arc::new) { - if matches!(entry.entry_data, EntryData::ConfChange(_)) { - conf_changes.push(Arc::clone(&entry)); - } #[allow(clippy::expect_used)] // It's safe to expect here. self.push_back( Arc::clone(&entry), @@ -435,7 +380,7 @@ impl Log { to_persist.push(entry); } - Ok((to_persist, conf_changes, need_fallback_indexes, truncate_at)) + Ok((to_persist, truncate_at)) } /// Check if the candidate's log is up-to-date @@ -569,15 +514,6 @@ impl Log { self.commit_index ); self.commit_index = commit_index; - self.fallback_contexts.retain(|&idx, c| { - if idx > self.commit_index { - return true; - } - if c.is_learner { - metrics::get().learner_promote_succeed.add(1, &[]); - } - false - }); } #[cfg(test)] diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index be04ceed2..1c563ec28 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -15,10 +15,8 @@ use std::cmp::min; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::HashMap; -use std::collections::HashSet; use std::fmt::Debug; use std::iter; -use std::sync::atomic::AtomicU64; use std::sync::atomic::AtomicU8; use std::sync::atomic::Ordering; use std::sync::Arc; @@ -77,11 +75,8 @@ use crate::response::ResponseSender; use crate::role_change::RoleChange; use crate::rpc::connect::InnerConnectApi; use crate::rpc::connect::InnerConnectApiWrapper; -use crate::rpc::ConfChange; -use crate::rpc::ConfChangeType; use crate::rpc::CurpError; use crate::rpc::IdSet; -use crate::rpc::Member; use crate::rpc::PoolEntry; use crate::rpc::ProposeId; use crate::rpc::PublishRequest; @@ -89,7 +84,6 @@ use crate::rpc::ReadState; use crate::rpc::Redirect; use crate::server::cmd_board::CmdBoardRef; use crate::server::metrics; -use crate::server::raw_curp::log::FallbackContext; use crate::snapshot::Snapshot; use crate::snapshot::SnapshotMeta; use crate::LogIndex; @@ -107,12 +101,6 @@ mod tests; /// Membership implementation mod member_impl; -/// Default Size of channel -const CHANGE_CHANNEL_SIZE: usize = 128; - -/// Max gap between leader and learner when promoting a learner -const MAX_PROMOTE_GAP: u64 = 500; - /// The curp state machine pub struct RawCurp { /// Curp state @@ -368,17 +356,8 @@ struct Context { leader_event: Arc, /// Leader change callback role_change: RC, - /// Conf change tx, used to update sync tasks - #[builder(setter(skip))] - change_tx: flume::Sender, - /// Conf change rx, used to update sync tasks - #[builder(setter(skip))] - change_rx: flume::Receiver, /// Connects of peers connects: DashMap, - /// last conf change idx - #[builder(setter(skip))] - last_conf_change_idx: AtomicU64, /// Curp storage curp_storage: Arc>, /// Speculative pool @@ -404,7 +383,6 @@ impl Context { impl ContextBuilder { /// Build the context from the builder pub(super) fn build(&mut self) -> Result, ContextBuilderError> { - let (change_tx, change_rx) = flume::bounded(CHANGE_CHANNEL_SIZE); Ok(Context { cluster_info: match self.cluster_info.take() { Some(value) => value, @@ -437,13 +415,10 @@ impl ContextBuilder { Some(value) => value, None => return Err(ContextBuilderError::UninitializedField("role_change")), }, - change_tx, - change_rx, connects: match self.connects.take() { Some(value) => value, None => return Err(ContextBuilderError::UninitializedField("connects")), }, - last_conf_change_idx: AtomicU64::new(0), curp_storage: match self.curp_storage.take() { Some(value) => value, None => return Err(ContextBuilderError::UninitializedField("curp_storage")), @@ -706,50 +681,6 @@ impl RawCurp { Ok(()) } - /// Handle `propose_conf_change` request - pub(super) fn handle_propose_conf_change( - &self, - propose_id: ProposeId, - conf_changes: Vec, - ) -> Result<(), CurpError> { - debug!("{} gets conf change for with id {}", self.id(), propose_id); - let st_r = self.st.read(); - - // Non-leader doesn't need to sync or execute - if st_r.role != Role::Leader { - return Err(CurpError::redirect(st_r.leader_id, st_r.term)); - } - - if self.lst.get_transferee().is_some() { - metrics::get() - .proposals_failed - .add(1, &[KeyValue::new("reason", "leader transferring")]); - return Err(CurpError::LeaderTransfer("leader transferring".to_owned())); - } - self.check_new_config(&conf_changes)?; - - self.deduplicate(propose_id, None)?; - let mut log_w = self.log.write(); - let entry = log_w.push(st_r.term, propose_id, conf_changes.clone()); - debug!("{} gets new log[{}]", self.id(), entry.index); - let apply_opt = self.apply_conf_change(conf_changes); - self.ctx - .last_conf_change_idx - .store(entry.index, Ordering::Release); - if let Some((addrs, name, is_learner)) = apply_opt { - let _ig = log_w.fallback_contexts.insert( - entry.index, - FallbackContext::new(Arc::clone(&entry), addrs, name, is_learner), - ); - } - self.entry_process_single(&mut log_w, &entry, false, st_r.term); - - let log_r = RwLockWriteGuard::downgrade(log_w); - self.persistent_log_entries(&[entry.as_ref()], &log_r); - - Ok(()) - } - /// Handle `publish` request pub(super) fn handle_publish(&self, req: PublishRequest) -> Result<(), CurpError> { debug!( @@ -848,34 +779,10 @@ impl RawCurp { // append log entries let mut log_w = self.log.write(); - let (to_persist, cc_entries, fallback_indexes, truncate_at) = log_w + let (to_persist, truncate_at) = log_w .try_append_entries(entries.clone(), prev_log_index, prev_log_term) .map_err(|_ig| (term, log_w.commit_index + 1))?; self.append_membership(&entries, truncate_at, leader_commit, spawn_sync); - // fallback overwritten conf change entries - for idx in fallback_indexes.iter().sorted().rev() { - let info = log_w.fallback_contexts.remove(idx).unwrap_or_else(|| { - unreachable!("fall_back_infos should contain the entry need to fallback") - }); - let EntryData::ConfChange(ref conf_change) = info.origin_entry.entry_data else { - unreachable!("the entry in the fallback_info should be conf change entry"); - }; - let changes = conf_change.clone(); - self.fallback_conf_change(changes, info.addrs, info.name, info.is_learner); - } - // apply conf change entries - for e in cc_entries { - let EntryData::ConfChange(ref cc) = e.entry_data else { - unreachable!("cc_entry should be conf change entry"); - }; - let Some((addrs, name, is_learner)) = self.apply_conf_change(cc.clone()) else { - continue; - }; - let _ig = log_w.fallback_contexts.insert( - e.index, - FallbackContext::new(Arc::clone(&e), addrs, name, is_learner), - ); - } // update commit index let prev_commit_index = log_w.commit_index; log_w.commit_index = min(leader_commit, log_w.last_log_index()); @@ -1022,23 +929,8 @@ impl RawCurp { let st_r = self.st.read(); let log_r = self.log.read(); let contains_candidate = self.cluster().contains(candidate_id); - let remove_candidate_is_not_committed = - log_r - .fallback_contexts - .iter() - .any(|(_, ctx)| match ctx.origin_entry.entry_data { - EntryData::ConfChange(ref cc) => cc.iter().any(|c| { - matches!(c.change_type(), ConfChangeType::Remove) - && c.node_id == candidate_id - }), - EntryData::Empty - | EntryData::Command(_) - | EntryData::Shutdown - | EntryData::SetNodeState(_, _, _) - | EntryData::Member(_) => false, - }); // extra check to shutdown removed node - if !contains_candidate && !remove_candidate_is_not_committed { + if !contains_candidate { debug!( "{} received pre vote from removed node {}", self.id(), @@ -1486,149 +1378,6 @@ impl RawCurp { .is_some_and(|match_index| match_index == leader_commit_index) } - /// Check if the new config is valid - pub(super) fn check_new_config(&self, changes: &[ConfChange]) -> Result<(), CurpError> { - assert_eq!(changes.len(), 1, "Joint consensus is not supported yet"); - let Some(conf_change) = changes.iter().next() else { - unreachable!("conf change is empty"); - }; - let mut statuses_ids = self - .lst - .get_all_statuses() - .keys() - .copied() - .chain([self.id()]) - .collect::>(); - let node_id = conf_change.node_id; - match conf_change.change_type() { - ConfChangeType::Add | ConfChangeType::AddLearner => { - if !statuses_ids.insert(node_id) { - return Err(CurpError::node_already_exists()); - } - } - ConfChangeType::Remove => { - if !statuses_ids.remove(&node_id) { - return Err(CurpError::node_not_exist()); - } - } - ConfChangeType::Update => { - if statuses_ids.get(&node_id).is_none() { - return Err(CurpError::node_not_exist()); - } - } - ConfChangeType::Promote => { - if statuses_ids.get(&node_id).is_none() { - metrics::get() - .learner_promote_failed - .add(1, &[KeyValue::new("reason", "learner not exist")]); - return Err(CurpError::node_not_exist()); - } - let learner_index = self - .lst - .get_match_index(node_id) - .unwrap_or_else(|| unreachable!("learner should exist here")); - let leader_index = self.log.read().last_log_index(); - if leader_index.overflow_sub(learner_index) > MAX_PROMOTE_GAP { - metrics::get() - .learner_promote_failed - .add(1, &[KeyValue::new("reason", "learner not catch up")]); - return Err(CurpError::learner_not_catch_up()); - } - } - } - - Ok(()) - } - - /// Apply conf changes and return true if self node is removed - pub(super) fn apply_conf_change( - &self, - changes: Vec, - ) -> Option<(Vec, String, bool)> { - assert_eq!(changes.len(), 1, "Joint consensus is not supported yet"); - let Some(conf_change) = changes.into_iter().next() else { - unreachable!("conf change is empty"); - }; - debug!("{} applies conf change {:?}", self.id(), conf_change); - self.switch_config(conf_change) - } - - /// Fallback conf change - pub(super) fn fallback_conf_change( - &self, - changes: Vec, - old_addrs: Vec, - name: String, - is_learner: bool, - ) { - assert_eq!(changes.len(), 1, "Joint consensus is not supported yet"); - if is_learner { - metrics::get().learner_promote_failed.add( - 1, - &[KeyValue::new( - "reason", - "configuration revert by new leader", - )], - ); - } - let Some(conf_change) = changes.into_iter().next() else { - unreachable!("conf change is empty"); - }; - let node_id = conf_change.node_id; - #[allow(clippy::explicit_auto_deref)] // Avoid compiler complaint about `Dashmap::Ref` type - let fallback_change = match conf_change.change_type() { - ConfChangeType::Add | ConfChangeType::AddLearner => { - self.lst.remove(node_id); - _ = self.ctx.sync_events.remove(&node_id); - let _ig1 = self.ctx.cluster_info.remove(&node_id); - let _ig2 = self.ctx.curp_storage.remove_member(node_id); - _ = self.ctx.connects.remove(&node_id); - Some(ConfChange::remove(node_id)) - } - ConfChangeType::Remove => { - let member = Member::new(node_id, name, old_addrs.clone(), [], is_learner); - self.lst.insert(node_id, is_learner); - _ = self.ctx.sync_events.insert(node_id, Arc::new(Event::new())); - let _ig1 = self.ctx.curp_storage.put_member(&member); - let _ig2 = self.ctx.cluster_info.insert(member); - if is_learner { - Some(ConfChange::add_learner(node_id, old_addrs)) - } else { - Some(ConfChange::add(node_id, old_addrs)) - } - } - ConfChangeType::Update => { - _ = self.ctx.cluster_info.update(&node_id, old_addrs.clone()); - let m = self.ctx.cluster_info.get(&node_id).unwrap_or_else(|| { - unreachable!("node {} should exist in cluster info", node_id) - }); - let _ig = self.ctx.curp_storage.put_member(&*m); - Some(ConfChange::update(node_id, old_addrs)) - } - ConfChangeType::Promote => { - self.ctx.cluster_info.demote(node_id); - self.lst.demote(node_id); - let m = self.ctx.cluster_info.get(&node_id).unwrap_or_else(|| { - unreachable!("node {} should exist in cluster info", node_id) - }); - let _ig = self.ctx.curp_storage.put_member(&*m); - None - } - }; - self.ctx.cluster_info.cluster_version_update(); - if let Some(c) = fallback_change { - self.ctx - .change_tx - .send(c) - .unwrap_or_else(|_e| unreachable!("change_rx should not be dropped")); - } - } - - /// Get a receiver for conf changes - pub(super) fn change_rx(&self) -> flume::Receiver { - self.ctx.change_rx.clone() - } - /// Get all connects pub(super) fn connects(&self) -> &DashMap { &self.ctx.connects @@ -1642,23 +1391,6 @@ impl RawCurp { op(self.ms.read().connects()) } - /// Insert connect - pub(super) fn insert_connect(&self, connect: InnerConnectApiWrapper) { - let _ig = self.ctx.connects.insert(connect.id(), connect); - } - - /// Update connect - pub(super) async fn update_connect( - &self, - id: ServerId, - addrs: Vec, - ) -> Result<(), CurpError> { - match self.ctx.connects.get(&id) { - Some(connect) => Ok(connect.update_addrs(addrs).await?), - None => Ok(()), - } - } - /// Get voters connects pub(super) fn voters_connects(&self) -> BTreeMap> { let ms_r = self.ms.read(); @@ -1685,17 +1417,6 @@ impl RawCurp { self.log.read().last_as } - /// Pick a node that has the same log as the current node - pub(super) fn pick_new_leader(&self) -> Option { - let last_idx = self.log.read().last_log_index(); - for (id, status) in self.lst.get_all_statuses() { - if status.match_index == last_idx && !status.is_learner { - return Some(id); - } - } - None - } - /// Mark a client id as bypassed pub(super) fn mark_client_id_bypassed(&self, client_id: u64) { let mut lm_w = self.ctx.lm.write(); @@ -1957,8 +1678,7 @@ impl RawCurp { EntryData::Command(ref cmd) => { let _ignore = ucp_l.insert(&PoolEntry::new(propose_id, Arc::clone(cmd))); } - EntryData::ConfChange(_) - | EntryData::Shutdown + EntryData::Shutdown | EntryData::Empty | EntryData::SetNodeState(_, _, _) | EntryData::Member(_) => {} @@ -2004,80 +1724,6 @@ impl RawCurp { self.lst.reset_no_op_state(); } - /// Switch to a new config and return old member infos for fallback - /// - /// FIXME: The state of `ctx.cluster_info` might be inconsistent with the - /// log. A potential fix would be to include the entire cluster info in - /// the conf change log entry and overwrite `ctx.cluster_info` when - /// switching - fn switch_config(&self, conf_change: ConfChange) -> Option<(Vec, String, bool)> { - let node_id = conf_change.node_id; - #[allow(clippy::explicit_auto_deref)] // Avoid compiler complaint about `Dashmap::Ref` type - let (modified, fallback_info) = match conf_change.change_type() { - ConfChangeType::Add | ConfChangeType::AddLearner => { - let is_learner = matches!(conf_change.change_type(), ConfChangeType::AddLearner); - let member = Member::new(node_id, "", conf_change.address.clone(), [], is_learner); - self.lst.insert(node_id, is_learner); - _ = self.ctx.sync_events.insert(node_id, Arc::new(Event::new())); - let _ig = self.ctx.curp_storage.put_member(&member); - let m = self.ctx.cluster_info.insert(member); - (m.is_none(), Some((vec![], String::new(), is_learner))) - } - ConfChangeType::Remove => { - self.lst.remove(node_id); - _ = self.ctx.sync_events.remove(&node_id); - _ = self.ctx.connects.remove(&node_id); - let _ig = self.ctx.curp_storage.remove_member(node_id); - // The member may not exist because the node could be restarted - // and has fetched the newest cluster info - // - // TODO: Review all the usages of `ctx.cluster_info` to ensure all - // the assertions are correct. - let member_opt = self.ctx.cluster_info.remove(&node_id); - ( - true, - member_opt.map(|m| (m.peer_urls, m.name, m.is_learner)), - ) - } - ConfChangeType::Update => { - let old_addrs = self - .ctx - .cluster_info - .update(&node_id, conf_change.address.clone()); - let m = self.ctx.cluster_info.get(&node_id).unwrap_or_else(|| { - unreachable!("the member should exist after update"); - }); - let _ig = self.ctx.curp_storage.put_member(&*m); - ( - old_addrs != conf_change.address, - Some((old_addrs, String::new(), false)), - ) - } - ConfChangeType::Promote => { - self.lst.promote(node_id); - let modified = self.ctx.cluster_info.promote(node_id); - let m = self.ctx.cluster_info.get(&node_id).unwrap_or_else(|| { - unreachable!("the member should exist after promote"); - }); - let _ig = self.ctx.curp_storage.put_member(&*m); - (modified, Some((vec![], String::new(), false))) - } - }; - if modified { - self.ctx.cluster_info.cluster_version_update(); - } - self.ctx - .change_tx - .send(conf_change) - .unwrap_or_else(|_e| unreachable!("change_rx should not be dropped")); - // TODO: We could wrap lst inside a role checking to prevent accidental lst - // mutation - if self.is_leader() && self.lst.get_transferee().is_some() { - self.lst.reset_transferee(); - } - fallback_info - } - /// Notify sync events fn notify_sync_events(&self, log: &Log) { self.ctx.sync_events.iter().for_each(|e| { diff --git a/crates/curp/src/server/raw_curp/state.rs b/crates/curp/src/server/raw_curp/state.rs index c26a75edb..722187d0f 100644 --- a/crates/curp/src/server/raw_curp/state.rs +++ b/crates/curp/src/server/raw_curp/state.rs @@ -75,17 +75,6 @@ impl Default for FollowerStatus { } } -impl FollowerStatus { - /// Create a new `FollowerStatus` - fn new(next_index: LogIndex, match_index: LogIndex, is_learner: bool) -> Self { - Self { - next_index, - match_index, - is_learner, - } - } -} - /// Additional state for the leader, all volatile #[derive(Debug)] pub(super) struct LeaderState { @@ -170,26 +159,6 @@ impl LeaderState { } } - /// Get statuses for all servers - pub(super) fn get_all_statuses(&self) -> HashMap { - self.statuses - .iter() - .map(|e| (*e.key(), *e.value())) - .collect() - } - - /// insert new status for id - pub(super) fn insert(&self, id: ServerId, is_learner: bool) { - _ = self - .statuses - .insert(id, FollowerStatus::new(1, 0, is_learner)); - } - - /// Remove a status - pub(super) fn remove(&self, id: ServerId) { - _ = self.statuses.remove(&id); - } - /// Get status for a server fn get_status(&self, id: ServerId) -> Option> { self.statuses.get(&id) @@ -238,20 +207,6 @@ impl LeaderState { self.statuses.iter() } - /// Promote a learner to voter - pub(super) fn promote(&self, node_id: ServerId) { - if let Some(mut s) = self.statuses.get_mut(&node_id) { - s.is_learner = false; - } - } - - /// Demote a voter to learner - pub(super) fn demote(&self, node_id: ServerId) { - if let Some(mut s) = self.statuses.get_mut(&node_id) { - s.is_learner = true; - } - } - /// Get transferee pub(super) fn get_transferee(&self) -> Option { let val = self.leader_transferee.load(Ordering::Acquire); diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index 003227815..154595d1c 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -25,6 +25,7 @@ impl RawCurp { self.st.read().role } + #[cfg(ignore)] fn contains(&self, id: ServerId) -> bool { self.cluster().all_members().contains_key(&id) && self.ctx.sync_events.contains_key(&id) @@ -121,6 +122,7 @@ impl RawCurp { log_w.push(st_r.term, propose_id, cmd).index } + #[cfg(ignore)] pub(crate) fn check_learner(&self, node_id: ServerId, is_learner: bool) -> bool { self.lst .get_all_statuses() @@ -682,6 +684,7 @@ fn is_synced_should_return_true_when_followers_caught_up_with_leader() { assert!(curp.is_synced(s2_id)); } +#[cfg(ignore)] // TODO: rewrite config change tests #[traced_test] #[test] fn add_node_should_add_new_node_to_curp() { @@ -709,6 +712,7 @@ fn add_node_should_add_new_node_to_curp() { ); } +#[cfg(ignore)] // TODO: rewrite config change tests #[traced_test] #[test] fn add_learner_node_and_promote_should_success() { @@ -730,6 +734,7 @@ fn add_learner_node_and_promote_should_success() { assert!(curp.check_learner(1, true)); } +#[cfg(ignore)] // TODO: rewrite config change tests #[traced_test] #[test] fn add_exists_node_should_return_node_already_exists_error() { @@ -745,6 +750,7 @@ fn add_exists_node_should_return_node_already_exists_error() { assert!(error_match); } +#[cfg(ignore)] // TODO: rewrite config change tests #[traced_test] #[test] fn remove_node_should_remove_node_from_curp() { @@ -770,6 +776,7 @@ fn remove_node_should_remove_node_from_curp() { ); } +#[cfg(ignore)] // TODO: rewrite config change tests #[traced_test] #[test] fn remove_non_exists_node_should_return_node_not_exists_error() { @@ -780,6 +787,7 @@ fn remove_non_exists_node_should_return_node_not_exists_error() { assert!(matches!(resp, Err(CurpError::NodeNotExists(())))); } +#[cfg(ignore)] // TODO: rewrite config change tests #[traced_test] #[test] fn update_node_should_update_the_address_of_node() { @@ -821,6 +829,7 @@ fn update_node_should_update_the_address_of_node() { ); } +#[cfg(ignore)] // TODO: rewrite config change tests #[traced_test] #[test] fn leader_handle_propose_conf_change() { @@ -839,6 +848,7 @@ fn leader_handle_propose_conf_change() { .unwrap(); } +#[cfg(ignore)] // TODO: rewrite config change tests #[traced_test] #[test] fn follower_handle_propose_conf_change() { @@ -865,6 +875,7 @@ fn follower_handle_propose_conf_change() { )); } +#[cfg(ignore)] // TODO: rewrite config change tests #[traced_test] #[test] fn leader_handle_move_leader() { @@ -900,6 +911,7 @@ fn follower_handle_move_leader() { assert!(matches!(res, Err(CurpError::Redirect(_)))); } +#[cfg(ignore)] // TODO: rewrite config change tests #[traced_test] #[test] fn leader_will_reset_transferee_after_remove_node() { diff --git a/crates/curp/tests/it/server.rs b/crates/curp/tests/it/server.rs index 019440e5f..97aad53d0 100644 --- a/crates/curp/tests/it/server.rs +++ b/crates/curp/tests/it/server.rs @@ -1,25 +1,23 @@ //! Integration test for the curp server -use std::{sync::Arc, time::Duration}; +use std::time::Duration; use clippy_utilities::NumericCast; use curp::{ client::{ClientApi, ClientBuilder}, - members::ClusterInfo, - rpc::{ConfChange, CurpError}, + rpc::CurpError, }; use curp_test_utils::{ - init_logger, sleep_millis, sleep_secs, - test_cmd::{TestCommand, TestCommandResult, TestCommandType}, + init_logger, + test_cmd::{TestCommand, TestCommandResult}, }; use futures::stream::FuturesUnordered; use madsim::rand::{thread_rng, Rng}; use test_macros::abort_on_panic; -use tokio::net::TcpListener; use tokio_stream::StreamExt; -use utils::{config::ClientConfig, timestamp}; +use utils::config::ClientConfig; -use crate::common::curp_group::{CurpGroup, FetchClusterRequest, DEFAULT_SHUTDOWN_TIMEOUT}; +use crate::common::curp_group::{CurpGroup, DEFAULT_SHUTDOWN_TIMEOUT}; #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] @@ -333,6 +331,7 @@ async fn shutdown_rpc_should_shutdown_the_cluster() { } } +#[cfg(ignore)] // TODO: Rewrite this tests #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] async fn propose_add_node_should_success() { @@ -350,6 +349,7 @@ async fn propose_add_node_should_success() { assert!(members.iter().any(|m| m.id == node_id)); } +#[cfg(ignore)] // TODO: Rewrite this tests #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] async fn propose_remove_follower_should_success() { @@ -379,6 +379,7 @@ async fn propose_remove_follower_should_success() { .unwrap(); } +#[cfg(ignore)] // TODO: Rewrite this tests #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] async fn propose_remove_leader_should_success() { @@ -408,6 +409,7 @@ async fn propose_remove_leader_should_success() { .unwrap(); } +#[cfg(ignore)] // TODO: Rewrite this tests #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] async fn propose_update_node_should_success() { @@ -445,6 +447,7 @@ async fn shutdown_rpc_should_shutdown_the_cluster_when_client_has_wrong_leader() .await; } +#[cfg(ignore)] // TODO: Rewrite this tests #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] async fn propose_conf_change_to_follower() { @@ -468,6 +471,7 @@ async fn propose_conf_change_to_follower() { assert!(member.is_some_and(|m| m.peer_urls == ["new_addr"])); } +#[cfg(ignore)] // TODO: Rewrite this tests async fn check_new_node(is_learner: bool) { init_logger(); @@ -536,18 +540,21 @@ async fn check_new_node(is_learner: bool) { .unwrap(); } +#[cfg(ignore)] // TODO: Rewrite this tests #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] async fn new_follower_node_should_apply_old_cluster_logs() { check_new_node(false).await; } +#[cfg(ignore)] // TODO: Rewrite this tests #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] async fn new_learner_node_should_apply_old_cluster_logs() { check_new_node(true).await; } +#[cfg(ignore)] // TODO: Rewrite this tests #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] async fn shutdown_rpc_should_shutdown_the_cluster_when_client_has_wrong_cluster() { @@ -575,6 +582,7 @@ async fn shutdown_rpc_should_shutdown_the_cluster_when_client_has_wrong_cluster( .await; } +#[cfg(ignore)] // TODO: Rewrite this tests #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] async fn propose_conf_change_rpc_should_work_when_client_has_wrong_cluster() { @@ -603,6 +611,7 @@ async fn propose_conf_change_rpc_should_work_when_client_has_wrong_cluster() { .await; } +#[cfg(ignore)] // TODO: Rewrite this tests #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] async fn fetch_read_state_rpc_should_work_when_client_has_wrong_cluster() { diff --git a/crates/xline/src/server/auth_wrapper.rs b/crates/xline/src/server/auth_wrapper.rs index 03647c8f7..37b4b25b4 100644 --- a/crates/xline/src/server/auth_wrapper.rs +++ b/crates/xline/src/server/auth_wrapper.rs @@ -6,9 +6,8 @@ use curp::{ AddLearnerRequest, AddLearnerResponse, AddMemberRequest, AddMemberResponse, FetchClusterRequest, FetchClusterResponse, FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, FetchReadStateResponse, LeaseKeepAliveMsg, MoveLeaderRequest, - MoveLeaderResponse, OpResponse, ProposeConfChangeRequest, ProposeConfChangeResponse, - ProposeRequest, Protocol, PublishRequest, PublishResponse, ReadIndexRequest, - ReadIndexResponse, RecordRequest, RecordResponse, RemoveLearnerRequest, + MoveLeaderResponse, OpResponse, ProposeRequest, Protocol, PublishRequest, PublishResponse, + ReadIndexRequest, ReadIndexResponse, RecordRequest, RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, RemoveMemberRequest, RemoveMemberResponse, ShutdownRequest, ShutdownResponse, }, @@ -83,13 +82,6 @@ impl Protocol for AuthWrapper { self.curp_server.shutdown(request).await } - async fn propose_conf_change( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - self.curp_server.propose_conf_change(request).await - } - async fn publish( &self, request: tonic::Request, diff --git a/crates/xline/src/server/cluster_server.rs b/crates/xline/src/server/cluster_server.rs index efe9bf92f..7097b3485 100644 --- a/crates/xline/src/server/cluster_server.rs +++ b/crates/xline/src/server/cluster_server.rs @@ -1,17 +1,11 @@ +// FIXME: implement cluster server +#![allow(unused, clippy::unimplemented)] + use std::sync::Arc; -use curp::{ - members::ClusterInfo, - rpc::{ - ConfChange, - ConfChangeType::{Add, AddLearner, Promote, Remove, Update}, - }, -}; -use itertools::Itertools; use tonic::{Request, Response, Status}; -use utils::timestamp; use xlineapi::{ - command::CurpClient, Cluster, Member, MemberAddRequest, MemberAddResponse, MemberListRequest, + command::CurpClient, Cluster, MemberAddRequest, MemberAddResponse, MemberListRequest, MemberListResponse, MemberPromoteRequest, MemberPromoteResponse, MemberRemoveRequest, MemberRemoveResponse, MemberUpdateRequest, MemberUpdateResponse, }; @@ -31,23 +25,6 @@ impl ClusterServer { pub(crate) fn new(client: Arc, header_gen: Arc) -> Self { Self { client, header_gen } } - - /// Send propose conf change request - async fn propose_conf_change(&self, changes: Vec) -> Result, Status> { - Ok(self - .client - .propose_conf_change(changes) - .await? - .into_iter() - .map(|member| Member { - id: member.id, - name: member.name.clone(), - peer_ur_ls: member.peer_urls.clone(), - client_ur_ls: member.client_urls.clone(), - is_learner: member.is_learner, - }) - .collect()) - } } #[tonic::async_trait] @@ -56,107 +33,34 @@ impl Cluster for ClusterServer { &self, request: Request, ) -> Result, Status> { - let req = request.into_inner(); - let change_type = if req.is_learner { - i32::from(AddLearner) - } else { - i32::from(Add) - }; - let peer_url_ls = req.peer_ur_ls.into_iter().sorted().collect_vec(); - // calculate node id based on addresses and current timestamp - let node_id = ClusterInfo::calculate_member_id(peer_url_ls.clone(), "", Some(timestamp())); - let members = self - .propose_conf_change(vec![ConfChange { - change_type, - node_id, - address: peer_url_ls, - }]) - .await?; - let resp = MemberAddResponse { - header: Some(self.header_gen.gen_header()), - member: members.iter().find(|m| m.id == node_id).cloned(), - members, - }; - Ok(Response::new(resp)) + unimplemented!() } async fn member_remove( &self, request: Request, ) -> Result, Status> { - let req = request.into_inner(); - let members = self - .propose_conf_change(vec![ConfChange { - change_type: i32::from(Remove), - node_id: req.id, - address: vec![], - }]) - .await?; - let resp = MemberRemoveResponse { - header: Some(self.header_gen.gen_header()), - members, - }; - Ok(Response::new(resp)) + unimplemented!() } async fn member_update( &self, request: Request, ) -> Result, Status> { - let req = request.into_inner(); - let members = self - .propose_conf_change(vec![ConfChange { - change_type: i32::from(Update), - node_id: req.id, - address: req.peer_ur_ls, - }]) - .await?; - let resp = MemberUpdateResponse { - header: Some(self.header_gen.gen_header()), - members, - }; - Ok(Response::new(resp)) + unimplemented!() } async fn member_list( &self, request: Request, ) -> Result, Status> { - let req = request.into_inner(); - let header = self.header_gen.gen_header(); - let members = self.client.fetch_cluster(req.linearizable).await?.members; - let resp = MemberListResponse { - header: Some(header), - members: members - .into_iter() - .map(|member| Member { - id: member.id, - name: member.name, - peer_ur_ls: member.peer_urls, - client_ur_ls: member.client_urls, - is_learner: member.is_learner, - }) - .collect(), - }; - Ok(Response::new(resp)) + unimplemented!() } async fn member_promote( &self, request: Request, ) -> Result, Status> { - let req = request.into_inner(); - let members = self - .propose_conf_change(vec![ConfChange { - change_type: i32::from(Promote), - node_id: req.id, - address: vec![], - }]) - .await?; - let resp = MemberPromoteResponse { - header: Some(self.header_gen.gen_header()), - members, - }; - Ok(Response::new(resp)) + unimplemented!() } } From 76b201c92591a61230f72dece39704d49357bf5b Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 10 Sep 2024 09:52:15 +0800 Subject: [PATCH 160/322] feat: add/remove member in ConnectApi Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/connect.rs | 12 +++++ crates/curp/src/client/keep_alive.rs | 25 ++++++++-- crates/curp/src/client/retry/mod.rs | 12 +++++ crates/curp/src/client/unary/mod.rs | 28 ++++++++++- crates/curp/src/rpc/connect/mod.rs | 73 +++++++++++++++++++++++++--- crates/curp/src/rpc/reconnect.rs | 29 +++++++++-- 6 files changed, 163 insertions(+), 16 deletions(-) diff --git a/crates/curp/src/client/connect.rs b/crates/curp/src/client/connect.rs index f3443ddc7..611ff0103 100644 --- a/crates/curp/src/client/connect.rs +++ b/crates/curp/src/client/connect.rs @@ -83,6 +83,12 @@ pub trait ClientApi { /// Remove some learners from the cluster. async fn remove_learner(&self, ids: Vec) -> Result<(), Self::Error>; + + /// Add some members to the cluster. + async fn add_member(&self, ids: Vec) -> Result<(), Self::Error>; + + /// Add some members to the cluster. + async fn remove_member(&self, ids: Vec) -> Result<(), Self::Error>; } /// This trait override some unrepeatable methods in ClientApi, and a client with this trait will be able to retry. @@ -131,4 +137,10 @@ pub(crate) trait RepeatableClientApi { /// Remove some learners from the cluster. async fn remove_learner(&self, ids: Vec, ctx: Context) -> Result<(), Self::Error>; + + /// Add some members to the cluster. + async fn add_member(&self, ids: Vec, ctx: Context) -> Result<(), Self::Error>; + + /// Remove some members from the cluster. + async fn remove_member(&self, ids: Vec, ctx: Context) -> Result<(), Self::Error>; } diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index aafbb1299..9955023e5 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -154,12 +154,13 @@ mod tests { use crate::rpc::{ connect::{ConnectApi, MockConnectApi}, - AddLearnerRequest, AddLearnerResponse, CurpError, FetchClusterRequest, - FetchClusterResponse, FetchMembershipRequest, FetchMembershipResponse, + AddLearnerRequest, AddLearnerResponse, AddMemberRequest, AddMemberResponse, CurpError, + FetchClusterRequest, FetchClusterResponse, FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, FetchReadStateResponse, Member, MoveLeaderRequest, MoveLeaderResponse, OpResponse, ProposeId, ProposeRequest, ProposeResponse, PublishRequest, PublishResponse, ReadIndexResponse, RecordRequest, RecordResponse, RemoveLearnerRequest, - RemoveLearnerResponse, ResponseOp, ShutdownRequest, ShutdownResponse, SyncedResponse, + RemoveLearnerResponse, RemoveMemberRequest, RemoveMemberResponse, ResponseOp, + ShutdownRequest, ShutdownResponse, SyncedResponse, }; struct MockedStreamConnectApi { @@ -292,6 +293,24 @@ mod tests { ) -> Result, CurpError> { unreachable!("please use MockedConnectApi") } + + /// Add a learner to the cluster. + async fn add_member( + &self, + request: AddMemberRequest, + timeout: Duration, + ) -> Result, CurpError> { + unreachable!("please use MockedConnectApi") + } + + /// Remove a learner from the cluster. + async fn remove_member( + &self, + request: RemoveMemberRequest, + timeout: Duration, + ) -> Result, CurpError> { + unreachable!("please use MockedConnectApi") + } } /// Create mocked stream connects diff --git a/crates/curp/src/client/retry/mod.rs b/crates/curp/src/client/retry/mod.rs index d2c985036..07221ad9c 100644 --- a/crates/curp/src/client/retry/mod.rs +++ b/crates/curp/src/client/retry/mod.rs @@ -468,6 +468,18 @@ where self.retry::<_, _>(|client, ctx| client.remove_learner(ids.clone(), ctx)) .await } + + /// Add some members to the cluster. + async fn add_member(&self, ids: Vec) -> Result<(), Self::Error> { + self.retry::<_, _>(|client, ctx| client.add_member(ids.clone(), ctx)) + .await + } + + /// Add some members to the cluster. + async fn remove_member(&self, ids: Vec) -> Result<(), Self::Error> { + self.retry::<_, _>(|client, ctx| client.remove_member(ids.clone(), ctx)) + .await + } } /// Tests for backoff diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index bafeb2dc3..bc4f08579 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -15,8 +15,8 @@ use super::{ use crate::{ members::ServerId, rpc::{ - AddLearnerRequest, CurpError, FetchReadStateRequest, MoveLeaderRequest, PublishRequest, - ReadState, RemoveLearnerRequest, ShutdownRequest, + AddLearnerRequest, AddMemberRequest, CurpError, FetchReadStateRequest, MoveLeaderRequest, + PublishRequest, ReadState, RemoveLearnerRequest, RemoveMemberRequest, ShutdownRequest, }, }; @@ -155,4 +155,28 @@ impl RepeatableClientApi for Unary { Ok(()) } + + /// Add some members to the cluster. + async fn add_member(&self, ids: Vec, ctx: Context) -> Result<(), Self::Error> { + let req = AddMemberRequest { node_ids: ids }; + let timeout = self.config.wait_synced_timeout(); + let _ig = ctx + .cluster_state() + .map_leader(|conn| async move { conn.add_member(req, timeout).await }) + .await?; + + Ok(()) + } + + /// Add some members to the cluster. + async fn remove_member(&self, ids: Vec, ctx: Context) -> Result<(), Self::Error> { + let req = RemoveMemberRequest { node_ids: ids }; + let timeout = self.config.wait_synced_timeout(); + let _ig = ctx + .cluster_state() + .map_leader(|conn| async move { conn.remove_member(req, timeout).await }) + .await?; + + Ok(()) + } } diff --git a/crates/curp/src/rpc/connect/mod.rs b/crates/curp/src/rpc/connect/mod.rs index 665bcea64..7d88781b2 100644 --- a/crates/curp/src/rpc/connect/mod.rs +++ b/crates/curp/src/rpc/connect/mod.rs @@ -37,12 +37,13 @@ use crate::{ commandpb::protocol_client::ProtocolClient, inner_messagepb::inner_protocol_client::InnerProtocolClient, }, - AppendEntriesRequest, AppendEntriesResponse, CurpError, FetchClusterRequest, - FetchClusterResponse, FetchReadStateRequest, FetchReadStateResponse, - InstallSnapshotRequest, InstallSnapshotResponse, LeaseKeepAliveMsg, MoveLeaderRequest, - MoveLeaderResponse, ProposeRequest, Protocol, PublishRequest, PublishResponse, - ShutdownRequest, ShutdownResponse, TriggerShutdownRequest, TryBecomeLeaderNowRequest, - VoteRequest, VoteResponse, + AddMemberRequest, AddMemberResponse, AppendEntriesRequest, AppendEntriesResponse, + CurpError, FetchClusterRequest, FetchClusterResponse, FetchReadStateRequest, + FetchReadStateResponse, InstallSnapshotRequest, InstallSnapshotResponse, LeaseKeepAliveMsg, + MoveLeaderRequest, MoveLeaderResponse, ProposeRequest, Protocol, PublishRequest, + PublishResponse, RemoveMemberRequest, RemoveMemberResponse, ShutdownRequest, + ShutdownResponse, TriggerShutdownRequest, TryBecomeLeaderNowRequest, VoteRequest, + VoteResponse, }, server::StreamingProtocol, snapshot::Snapshot, @@ -251,6 +252,20 @@ pub(crate) trait ConnectApi: Send + Sync + 'static { request: RemoveLearnerRequest, timeout: Duration, ) -> Result, CurpError>; + + /// Add a learner to the cluster. + async fn add_member( + &self, + request: AddMemberRequest, + timeout: Duration, + ) -> Result, CurpError>; + + /// Remove a learner from the cluster. + async fn remove_member( + &self, + request: RemoveMemberRequest, + timeout: Duration, + ) -> Result, CurpError>; } /// Inner Connect interface among different servers @@ -563,6 +578,29 @@ impl ConnectApi for Connect> { req.metadata_mut().inject_current(); with_timeout!(timeout, client.remove_learner(req)).map_err(Into::into) } + + async fn add_member( + &self, + request: AddMemberRequest, + timeout: Duration, + ) -> Result, CurpError> { + let mut client = self.rpc_connect.clone(); + let mut req = tonic::Request::new(request); + req.metadata_mut().inject_current(); + with_timeout!(timeout, client.add_member(req)).map_err(Into::into) + } + + /// Remove a learner from the cluster. + async fn remove_member( + &self, + request: RemoveMemberRequest, + timeout: Duration, + ) -> Result, CurpError> { + let mut client = self.rpc_connect.clone(); + let mut req = tonic::Request::new(request); + req.metadata_mut().inject_current(); + with_timeout!(timeout, client.remove_member(req)).map_err(Into::into) + } } #[allow(clippy::let_and_return)] // for metrics @@ -875,6 +913,29 @@ where req.metadata_mut().inject_current(); self.server.remove_learner(req).await.map_err(Into::into) } + + async fn add_member( + &self, + request: AddMemberRequest, + _timeout: Duration, + ) -> Result, CurpError> { + let mut req = tonic::Request::new(request); + req.metadata_mut().inject_bypassed(); + req.metadata_mut().inject_current(); + self.server.add_member(req).await.map_err(Into::into) + } + + /// Remove a learner from the cluster. + async fn remove_member( + &self, + request: RemoveMemberRequest, + _timeout: Duration, + ) -> Result, CurpError> { + let mut req = tonic::Request::new(request); + req.metadata_mut().inject_bypassed(); + req.metadata_mut().inject_current(); + self.server.remove_member(req).await.map_err(Into::into) + } } /// Generate heartbeat stream diff --git a/crates/curp/src/rpc/reconnect.rs b/crates/curp/src/rpc/reconnect.rs index 18c2346fc..fb2cc713c 100644 --- a/crates/curp/src/rpc/reconnect.rs +++ b/crates/curp/src/rpc/reconnect.rs @@ -7,11 +7,12 @@ use futures::Stream; use crate::{ members::ServerId, rpc::{ - connect::ConnectApi, AddLearnerRequest, AddLearnerResponse, CurpError, FetchClusterRequest, - FetchClusterResponse, FetchMembershipRequest, FetchMembershipResponse, - FetchReadStateRequest, FetchReadStateResponse, MoveLeaderRequest, MoveLeaderResponse, - OpResponse, ProposeRequest, PublishRequest, PublishResponse, ReadIndexResponse, - RecordRequest, RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, + connect::ConnectApi, AddLearnerRequest, AddLearnerResponse, AddMemberRequest, + AddMemberResponse, CurpError, FetchClusterRequest, FetchClusterResponse, + FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, + FetchReadStateResponse, MoveLeaderRequest, MoveLeaderResponse, OpResponse, ProposeRequest, + PublishRequest, PublishResponse, ReadIndexResponse, RecordRequest, RecordResponse, + RemoveLearnerRequest, RemoveLearnerResponse, RemoveMemberRequest, RemoveMemberResponse, ShutdownRequest, ShutdownResponse, }, }; @@ -200,4 +201,22 @@ impl ConnectApi for Reconnect { ) -> Result, CurpError> { execute_with_reconnect!(self, ConnectApi::remove_learner, request, timeout) } + + /// Add a learner to the cluster. + async fn add_member( + &self, + request: AddMemberRequest, + timeout: Duration, + ) -> Result, CurpError> { + execute_with_reconnect!(self, ConnectApi::add_member, request, timeout) + } + + /// Remove a learner from the cluster. + async fn remove_member( + &self, + request: RemoveMemberRequest, + timeout: Duration, + ) -> Result, CurpError> { + execute_with_reconnect!(self, ConnectApi::remove_member, request, timeout) + } } From 06155e92b4c524c96bd2ddad7ef82acea2eb727a Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 11 Sep 2024 17:58:12 +0800 Subject: [PATCH 161/322] feat: fixup curp client fetch implementation Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/connect.rs | 27 +-- crates/curp/src/client/fetch.rs | 311 ++++++---------------------- crates/curp/src/client/mod.rs | 23 +- crates/curp/src/client/retry/mod.rs | 4 +- 4 files changed, 86 insertions(+), 279 deletions(-) diff --git a/crates/curp/src/client/connect.rs b/crates/curp/src/client/connect.rs index 611ff0103..e0d9e0c29 100644 --- a/crates/curp/src/client/connect.rs +++ b/crates/curp/src/client/connect.rs @@ -1,10 +1,9 @@ use async_trait::async_trait; use curp_external_api::cmd::Command; -use tracing::debug; use crate::{ members::ServerId, - rpc::{FetchClusterResponse, ReadState}, + rpc::{FetchMembershipResponse, ReadState}, }; use super::retry::Context; @@ -55,27 +54,17 @@ pub trait ClientApi { /// know who the leader is.) /// /// Note: The fetched cluster may still be outdated if `linearizable` is false - async fn fetch_cluster(&self, linearizable: bool) -> Result; + async fn fetch_cluster( + &self, + linearizable: bool, + ) -> Result; /// Fetch leader id #[inline] async fn fetch_leader_id(&self, linearizable: bool) -> Result { - if linearizable { - let resp = self.fetch_cluster(true).await?; - return Ok(resp - .leader_id - .unwrap_or_else(|| { - unreachable!("linearizable fetch cluster should return a leader id") - }) - .into()); - } - let resp = self.fetch_cluster(false).await?; - if let Some(id) = resp.leader_id { - return Ok(id.into()); - } - debug!("no leader id in FetchClusterResponse, try to send linearizable request"); - // fallback to linearizable fetch - self.fetch_leader_id(true).await + self.fetch_cluster(linearizable) + .await + .map(|resp| resp.leader_id) } /// Add some learners to the cluster. diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index 4515d9370..b28c4d0ae 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -9,10 +9,7 @@ use utils::parking_lot_lock::RwLockMap; use crate::{ quorum, - rpc::{ - self, connect::ConnectApi, CurpError, FetchClusterRequest, FetchClusterResponse, - FetchMembershipRequest, FetchMembershipResponse, - }, + rpc::{self, connect::ConnectApi, CurpError, FetchMembershipRequest, FetchMembershipResponse}, }; use super::cluster_state::{ClusterState, ClusterStateReady, ForEachServer}; @@ -23,37 +20,13 @@ use super::config::Config; /// This is used to build a boxed closure that handles the `FetchClusterResponse` and returns /// new connections. pub(super) trait ConnectToCluster: - Fn(&FetchClusterResponse) -> HashMap> + Send + Sync + 'static + Fn(&FetchMembershipResponse) -> HashMap> + Send + Sync + 'static { /// Clone the value fn clone_box(&self) -> Box; } impl ConnectToCluster for T -where - T: Fn(&FetchClusterResponse) -> HashMap> - + Clone - + Send - + Sync - + 'static, -{ - fn clone_box(&self) -> Box { - Box::new(self.clone()) - } -} - -/// Connect to cluster -/// -/// This is used to build a boxed closure that handles the `FetchClusterResponse` and returns -/// new connections. -pub(super) trait ConnectToClusterNew: - Fn(&FetchMembershipResponse) -> HashMap> + Send + Sync + 'static -{ - /// Clone the value - fn clone_box(&self) -> Box; -} - -impl ConnectToClusterNew for T where T: Fn(&FetchMembershipResponse) -> HashMap> + Clone @@ -61,7 +34,7 @@ where + Sync + 'static, { - fn clone_box(&self) -> Box { + fn clone_box(&self) -> Box { Box::new(self.clone()) } } @@ -92,12 +65,6 @@ impl Fetch { } } - #[allow(clippy::unimplemented)] // FIXME: implement this - /// Creates a new `Fetch` - pub(crate) fn new_membership(timeout: Duration, connect_to: C) -> Self { - unimplemented!() - } - #[cfg(test)] /// Creates a new `Fetch` fetch disabled pub(crate) fn new_disable() -> Self { @@ -111,43 +78,17 @@ impl Fetch { pub(crate) async fn fetch_cluster( &self, state: impl ForEachServer, - ) -> Result<(ClusterStateReady, FetchClusterResponse), CurpError> { - let resp = self - .pre_fetch(&state) - .await - .ok_or(CurpError::internal("cluster not available"))?; - let new_connects = (self.connect_to)(&resp); - let new_state = ClusterStateReady::new( - resp.leader_id - .unwrap_or_else(|| unreachable!("leader id should be Some")) - .into(), - resp.term, - resp.cluster_version, - new_connects, - ); - - if self.fetch_term(&new_state).await { - return Ok((new_state, resp)); - } - - Err(CurpError::internal("cluster not available")) - } - - #[allow(clippy::diverging_sub_expression, clippy::todo)] // FIXME: implement - /// Fetch cluster and updates the current state - pub(crate) async fn fetch_membership( - &self, - state: impl ForEachServer, ) -> Result<(ClusterStateReady, FetchMembershipResponse), CurpError> { let resp = self - .pre_fetch_membership(&state) + .pre_fetch(&state) .await .ok_or(CurpError::internal("cluster not available"))?; + let connects = (self.connect_to)(&resp); let new_state = ClusterStateReady::new_membership( resp.leader_id, resp.term, - todo!("call connect"), - resp.into_membership(), + connects, + resp.clone().into_membership(), ); if self.fetch_term(&new_state).await { return Ok((new_state, resp)); @@ -163,8 +104,7 @@ impl Fetch { let quorum = state.get_quorum(quorum); state .for_each_server(|c| async move { - c.fetch_cluster(FetchClusterRequest { linearizable: true }, timeout) - .await + c.fetch_membership(FetchMembershipRequest {}, timeout).await }) .filter_map(|r| future::ready(r.ok())) .map(Response::into_inner) @@ -177,30 +117,7 @@ impl Fetch { /// Prefetch, send fetch cluster request to the cluster and get the /// config with the greatest quorum. - async fn pre_fetch(&self, state: &impl ForEachServer) -> Option { - let timeout = self.timeout; - let requests = state.for_each_server(|c| async move { - c.fetch_cluster(FetchClusterRequest { linearizable: true }, timeout) - .await - }); - let responses: Vec<_> = requests - .filter_map(|r| future::ready(r.ok())) - .map(Response::into_inner) - .collect() - .await; - responses - .into_iter() - .filter(|resp| resp.leader_id.is_some()) - .filter(|resp| !resp.members.is_empty()) - .max_by(|x, y| x.term.cmp(&y.term)) - } - - /// Prefetch, send fetch cluster request to the cluster and get the - /// config with the greatest quorum. - async fn pre_fetch_membership( - &self, - state: &impl ForEachServer, - ) -> Option { + async fn pre_fetch(&self, state: &impl ForEachServer) -> Option { let timeout = self.timeout; let requests = state.for_each_server(|c| async move { c.fetch_membership(FetchMembershipRequest {}, timeout).await @@ -234,7 +151,7 @@ mod test { use crate::{ client::{cluster_state::ForEachServer, config::Config, tests::init_mocked_connects}, - rpc::{connect::ConnectApi, CurpError, FetchClusterResponse, Member}, + rpc::{self, connect::ConnectApi, CurpError, FetchMembershipResponse, Member, Node}, }; use super::Fetch; @@ -253,34 +170,43 @@ mod test { Fetch::new(Duration::from_secs(0), move |_| connects.clone()) } + fn build_membership_resp( + leader_id: Option, + term: u64, + members: impl IntoIterator, + ) -> Result, CurpError> { + let leader_id = leader_id.ok_or(CurpError::leader_transfer("no current leader"))?; + + let members: Vec<_> = members.into_iter().collect(); + let nodes: Vec = members + .clone() + .into_iter() + .map(|node_id| Node { + node_id, + addr: String::new(), + }) + .collect(); + let qs = rpc::QuorumSet { set: members }; + + let resp = FetchMembershipResponse { + members: vec![qs], + nodes, + term, + leader_id, + }; + Ok(tonic::Response::new(resp)) + } + #[traced_test] #[tokio::test(flavor = "multi_thread")] async fn test_unary_fetch_clusters_serializable() { let connects = init_mocked_connects(3, |_id, conn| { - conn.expect_fetch_cluster().returning(|_req, _timeout| { - Ok(tonic::Response::new(FetchClusterResponse { - leader_id: Some(0.into()), - term: 1, - cluster_id: 123, - members: vec![ - Member::new(0, "S0", vec!["A0".to_owned()], [], false), - Member::new(1, "S1", vec!["A1".to_owned()], [], false), - Member::new(2, "S2", vec!["A2".to_owned()], [], false), - ], - cluster_version: 1, - })) - }); + conn.expect_fetch_membership() + .returning(|_req, _timeout| build_membership_resp(Some(0), 1, vec![0, 1, 2])); }); let fetch = init_fetch(connects.clone()); let (_, res) = fetch.fetch_cluster(connects).await.unwrap(); - assert_eq!( - res.into_peer_urls(), - HashMap::from([ - (0, vec!["A0".to_owned()]), - (1, vec!["A1".to_owned()]), - (2, vec!["A2".to_owned()]) - ]) - ); + assert_eq!(res.members[0].set, vec![0, 1, 2]); } #[traced_test] @@ -288,84 +214,25 @@ mod test { async fn test_unary_fetch_clusters_linearizable() { let connects = init_mocked_connects(5, |id, conn| { match id { - 0 => { - conn.expect_fetch_cluster().returning(|_req, _timeout| { - let resp = FetchClusterResponse { - leader_id: Some(0.into()), - term: 2, - cluster_id: 123, - members: vec![ - Member::new(0, "S0", vec!["A0".to_owned()], [], false), - Member::new(1, "S1", vec!["A1".to_owned()], [], false), - Member::new(2, "S2", vec!["A2".to_owned()], [], false), - Member::new(3, "S3", vec!["A3".to_owned()], [], false), - Member::new(4, "S4", vec!["A4".to_owned()], [], false), - ], - cluster_version: 1, - }; - Ok(tonic::Response::new(resp)) - }); - } - 1 | 4 => { - conn.expect_fetch_cluster().returning(|_req, _timeout| { - let resp = FetchClusterResponse { - leader_id: Some(0.into()), - term: 2, - cluster_id: 123, - members: vec![], // linearizable read from follower returns empty members - cluster_version: 1, - }; - - Ok(tonic::Response::new(resp)) - }); - } - 2 => { - conn.expect_fetch_cluster().returning(|_req, _timeout| { - let resp = FetchClusterResponse { - leader_id: None, - term: 23, // abnormal term - cluster_id: 123, - members: vec![], - cluster_version: 1, - }; - - Ok(tonic::Response::new(resp)) - }); - } - 3 => { - conn.expect_fetch_cluster().returning(|_req, _timeout| { - let resp = FetchClusterResponse { - leader_id: Some(3.into()), // imagine this node is a old leader - term: 1, // with the old term - cluster_id: 123, - members: vec![ - Member::new(0, "S0", vec!["B0".to_owned()], [], false), - Member::new(1, "S1", vec!["B1".to_owned()], [], false), - Member::new(2, "S2", vec!["B2".to_owned()], [], false), - Member::new(3, "S3", vec!["B3".to_owned()], [], false), - Member::new(4, "S4", vec!["B4".to_owned()], [], false), - ], - cluster_version: 1, - }; - - Ok(tonic::Response::new(resp)) - }); - } + 0 => conn.expect_fetch_membership().returning(|_req, _timeout| { + build_membership_resp(Some(0), 2, vec![0, 1, 2, 3, 4]) + }), + 1 | 4 => conn + .expect_fetch_membership() + .returning(|_req, _timeout| build_membership_resp(Some(0), 2, vec![])), + 2 => conn + .expect_fetch_membership() + .returning(|_req, _timeout| build_membership_resp(None, 23, vec![])), + 3 => conn.expect_fetch_membership().returning(|_req, _timeout| { + build_membership_resp(Some(3), 1, vec![1, 2, 3, 4]) + }), _ => unreachable!("there are only 5 nodes"), }; }); let fetch = init_fetch(connects.clone()); let (_, res) = fetch.fetch_cluster(connects).await.unwrap(); - assert_eq!( - res.into_peer_urls(), - HashMap::from([ - (0, vec!["A0".to_owned()]), - (1, vec!["A1".to_owned()]), - (2, vec!["A2".to_owned()]), - (3, vec!["A3".to_owned()]), - (4, vec!["A4".to_owned()]) - ]) - ); + + assert_eq!(res.members[0].set, vec![0, 1, 2, 3, 4]); } #[traced_test] @@ -374,76 +241,26 @@ mod test { let connects = init_mocked_connects(5, |id, conn| { match id { 0 => { - conn.expect_fetch_cluster().returning(|_req, _timeout| { - let resp = FetchClusterResponse { - leader_id: Some(0.into()), - term: 2, - cluster_id: 123, - members: vec![ - Member::new(0, "S0", vec!["A0".to_owned()], [], false), - Member::new(1, "S1", vec!["A1".to_owned()], [], false), - Member::new(2, "S2", vec!["A2".to_owned()], [], false), - Member::new(3, "S3", vec!["A3".to_owned()], [], false), - Member::new(4, "S4", vec!["A4".to_owned()], [], false), - ], - cluster_version: 1, - }; - Ok(tonic::Response::new(resp)) + conn.expect_fetch_membership().returning(|_req, _timeout| { + build_membership_resp(Some(0), 2, vec![0, 1, 2, 3, 4]) }); } 1 => { - conn.expect_fetch_cluster().returning(|_req, _timeout| { - let resp = FetchClusterResponse { - leader_id: Some(0.into()), - term: 2, - cluster_id: 123, - members: vec![], // linearizable read from follower returns empty members - cluster_version: 1, - }; - Ok(tonic::Response::new(resp)) - }); + conn.expect_fetch_membership() + .returning(|_req, _timeout| build_membership_resp(Some(0), 2, vec![])); } 2 => { - conn.expect_fetch_cluster().returning(|_req, _timeout| { - let resp = FetchClusterResponse { - leader_id: None, // imagine this node is a disconnected candidate - term: 23, // with a high term - cluster_id: 123, - members: vec![], - cluster_version: 1, - }; - Ok(tonic::Response::new(resp)) - }); + conn.expect_fetch_membership() + .returning(|_req, _timeout| build_membership_resp(None, 23, vec![])); } 3 => { - conn.expect_fetch_cluster().returning(|_req, _timeout| { - let resp = FetchClusterResponse { - leader_id: Some(3.into()), // imagine this node is a old leader - term: 1, // with the old term - cluster_id: 123, - members: vec![ - Member::new(0, "S0", vec!["B0".to_owned()], [], false), - Member::new(1, "S1", vec!["B1".to_owned()], [], false), - Member::new(2, "S2", vec!["B2".to_owned()], [], false), - Member::new(3, "S3", vec!["B3".to_owned()], [], false), - Member::new(4, "S4", vec!["B4".to_owned()], [], false), - ], - cluster_version: 1, - }; - Ok(tonic::Response::new(resp)) + conn.expect_fetch_membership().returning(|_req, _timeout| { + build_membership_resp(Some(3), 1, vec![0, 1, 2, 3, 4]) }); } 4 => { - conn.expect_fetch_cluster().returning(|_req, _timeout| { - let resp = FetchClusterResponse { - leader_id: Some(3.into()), // imagine this node is a old follower of old leader(3) - term: 1, // with the old term - cluster_id: 123, - members: vec![], - cluster_version: 1, - }; - Ok(tonic::Response::new(resp)) - }); + conn.expect_fetch_membership() + .returning(|_req, _timeout| build_membership_resp(Some(3), 1, vec![])); } _ => unreachable!("there are only 5 nodes"), }; diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 35eca32b8..befb0fad1 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -66,7 +66,8 @@ use crate::{ self, connect::{BypassedConnect, ConnectApi}, protocol_client::ProtocolClient, - FetchClusterRequest, FetchClusterResponse, ProposeId, Protocol, + FetchClusterRequest, FetchClusterResponse, FetchMembershipResponse, Node, ProposeId, + Protocol, }, server::StreamingProtocol, tracker::Tracker, @@ -304,19 +305,19 @@ impl ClientBuilder { &self, bypassed: Option<(u64, Arc)>, ) -> impl ConnectToCluster { - let is_raw_curp = self.is_raw_curp; + // TODO: distinguish peer urls / client urls let tls_config = self.tls_config.clone(); - move |resp: &FetchClusterResponse| -> HashMap> { - let members = if is_raw_curp { - resp.clone().into_peer_urls() - } else { - resp.clone().into_client_urls() - }; - members + move |resp: &FetchMembershipResponse| -> HashMap> { + resp.nodes + .clone() .into_iter() - .map(|(id, addrs)| (id, rpc::connect(id, addrs, tls_config.clone()))) + .map(|node| { + let Node { node_id, addr } = node; + let connect = rpc::connect(node_id, vec![addr], tls_config.clone()); + (node_id, connect) + }) .chain(bypassed.clone()) - .collect() + .collect::>() } } diff --git a/crates/curp/src/client/retry/mod.rs b/crates/curp/src/client/retry/mod.rs index 07221ad9c..1b44cd161 100644 --- a/crates/curp/src/client/retry/mod.rs +++ b/crates/curp/src/client/retry/mod.rs @@ -19,7 +19,7 @@ use super::{ }; use crate::{ members::ServerId, - rpc::{CurpError, FetchClusterResponse, ReadState, Redirect, ProposeId}, tracker::Tracker, + rpc::{CurpError, FetchClusterResponse, ReadState, Redirect, ProposeId, FetchMembershipResponse}, tracker::Tracker, }; /// Backoff config @@ -449,7 +449,7 @@ where async fn fetch_cluster( &self, linearizable: bool, - ) -> Result { + ) -> Result { self.retry::<_, _>(|client, ctx| async move { let (_, resp) = self.fetch.fetch_cluster(ctx.cluster_state()).await?; Ok(resp) From c476f4677d300c9461711812e8b7dc3e0f783977 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:02:44 +0800 Subject: [PATCH 162/322] chore: move retry/mod.rs to retry.rs Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/{retry/mod.rs => retry.rs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename crates/curp/src/client/{retry/mod.rs => retry.rs} (100%) diff --git a/crates/curp/src/client/retry/mod.rs b/crates/curp/src/client/retry.rs similarity index 100% rename from crates/curp/src/client/retry/mod.rs rename to crates/curp/src/client/retry.rs From 311cc81a394b84f49de929f20907e6339d03a4eb Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:17:20 +0800 Subject: [PATCH 163/322] refactor: update cluster state Removes the `cluster_version` from ClusterState Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/cluster_state.rs | 25 --------------- crates/curp/src/client/fetch.rs | 2 +- crates/curp/src/client/keep_alive.rs | 32 +++++++++++--------- crates/curp/src/client/tests.rs | 25 +++++++++------ crates/curp/src/client/unary/mod.rs | 14 ++++----- crates/curp/src/client/unary/propose_impl.rs | 2 +- crates/curp/src/member.rs | 6 ++++ 7 files changed, 48 insertions(+), 58 deletions(-) diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index d17fc967f..237688fea 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -81,8 +81,6 @@ pub(crate) struct ClusterStateReady { leader: ServerId, /// Term, initialize to 0, calibrated by the server. term: u64, - /// Cluster version, initialize to 0, calibrated by the server. - cluster_version: u64, /// Members' connect, calibrated by the server. connects: HashMap>, } @@ -92,7 +90,6 @@ impl std::fmt::Debug for ClusterStateReady { f.debug_struct("State") .field("leader", &self.leader) .field("term", &self.term) - .field("cluster_version", &self.cluster_version) .field("connects", &self.connects.keys()) .finish() } @@ -110,22 +107,6 @@ impl ForEachServer for ClusterStateReady { impl ClusterStateReady { /// Creates a new `ClusterState` pub(crate) fn new( - leader: ServerId, - term: u64, - cluster_version: u64, - connects: HashMap>, - ) -> Self { - Self { - membership: Membership::default(), // FIXME: build initial membership config - leader, - term, - cluster_version, - connects, - } - } - - /// Creates a new `ClusterState` - pub(crate) fn new_membership( leader: ServerId, term: u64, connects: HashMap>, @@ -135,7 +116,6 @@ impl ClusterStateReady { membership, leader, term, - cluster_version: 0, connects, } } @@ -243,11 +223,6 @@ impl ClusterStateReady { self.term } - /// Returns the cluster version - pub(crate) fn cluster_version(&self) -> u64 { - self.cluster_version - } - /// Returns the leader id pub(crate) fn leader_id(&self) -> u64 { self.leader diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index b28c4d0ae..a5f43e259 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -84,7 +84,7 @@ impl Fetch { .await .ok_or(CurpError::internal("cluster not available"))?; let connects = (self.connect_to)(&resp); - let new_state = ClusterStateReady::new_membership( + let new_state = ClusterStateReady::new( resp.leader_id, resp.term, connects, diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index 9955023e5..b6a35e493 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -144,7 +144,7 @@ impl KeepAlive { #[cfg(test)] mod tests { - use std::collections::HashMap; + use std::collections::{BTreeSet, HashMap}; use super::*; @@ -152,15 +152,18 @@ mod tests { use tonic::Status; use tracing_test::traced_test; - use crate::rpc::{ - connect::{ConnectApi, MockConnectApi}, - AddLearnerRequest, AddLearnerResponse, AddMemberRequest, AddMemberResponse, CurpError, - FetchClusterRequest, FetchClusterResponse, FetchMembershipRequest, FetchMembershipResponse, - FetchReadStateRequest, FetchReadStateResponse, Member, MoveLeaderRequest, - MoveLeaderResponse, OpResponse, ProposeId, ProposeRequest, ProposeResponse, PublishRequest, - PublishResponse, ReadIndexResponse, RecordRequest, RecordResponse, RemoveLearnerRequest, - RemoveLearnerResponse, RemoveMemberRequest, RemoveMemberResponse, ResponseOp, - ShutdownRequest, ShutdownResponse, SyncedResponse, + use crate::{ + member::Membership, + rpc::{ + connect::{ConnectApi, MockConnectApi}, + AddLearnerRequest, AddLearnerResponse, AddMemberRequest, AddMemberResponse, CurpError, + FetchClusterRequest, FetchClusterResponse, FetchMembershipRequest, + FetchMembershipResponse, FetchReadStateRequest, FetchReadStateResponse, Member, + MoveLeaderRequest, MoveLeaderResponse, OpResponse, ProposeId, ProposeRequest, + ProposeResponse, PublishRequest, PublishResponse, ReadIndexResponse, RecordRequest, + RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, RemoveMemberRequest, + RemoveMemberResponse, ResponseOp, ShutdownRequest, ShutdownResponse, SyncedResponse, + }, }; struct MockedStreamConnectApi { @@ -350,13 +353,14 @@ mod tests { connects: HashMap>, leader: u64, term: u64, - cluster_version: u64, ) -> KeepAliveHandle { + let members = (0..5).collect::>(); + let nodes = members.iter().map(|id| (*id, format!("{id}"))).collect(); let state = ClusterState::Ready(ClusterStateReady::new( leader, term, - cluster_version, connects.clone(), + Membership::new(vec![members], nodes), )); let fetch = Fetch::new(Duration::from_secs(0), move |_| connects.clone()); let state_shared = ClusterStateShared::new_test(state, fetch); @@ -370,7 +374,7 @@ mod tests { async fn test_stream_client_keep_alive_works() { let connects = init_mocked_stream_connects(5, 0, 1, move |client_id| Box::pin(async move { Ok(10) })); - let mut keep_alive = init_stream_client(connects, 0, 1, 1); + let mut keep_alive = init_stream_client(connects, 0, 1); tokio::time::timeout(Duration::from_millis(100), &mut keep_alive.handle) .await .unwrap_err(); @@ -382,7 +386,7 @@ mod tests { async fn test_stream_client_keep_alive_on_redirect() { let connects = init_mocked_stream_connects(5, 0, 2, move |client_id| Box::pin(async move { Ok(10) })); - let mut keep_alive = init_stream_client(connects, 1, 1, 1); + let mut keep_alive = init_stream_client(connects, 1, 1); tokio::time::timeout(Duration::from_millis(100), &mut keep_alive.handle) .await .unwrap_err(); diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index 20a51e57e..163feff62 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -1,5 +1,5 @@ use std::{ - collections::HashMap, + collections::{BTreeSet, HashMap}, sync::{Arc, Mutex}, time::{Duration, Instant}, }; @@ -21,6 +21,7 @@ use crate::{ retry::{Context, Retry, RetryConfig}, ClientApi, }, + member::Membership, members::ServerId, rpc::{ connect::{ConnectApi, MockConnectApi}, @@ -83,6 +84,12 @@ fn build_empty_response() -> OpResponse { OpResponse { op: None } } +fn build_default_membership() -> Membership { + let members = (0..5).collect::>(); + let nodes = members.iter().map(|id| (*id, format!("{id}"))).collect(); + Membership::new(vec![members], nodes) +} + #[traced_test] #[tokio::test] async fn test_unary_propose_fast_path_works() { @@ -108,7 +115,7 @@ async fn test_unary_propose_fast_path_works() { }); }); let unary = init_unary_client(None, None); - let cluster_state = ClusterStateReady::new(0, 1, 0, connects); + let cluster_state = ClusterStateReady::new(0, 1, connects, build_default_membership()); let ctx = Context::new(ProposeId::default(), 0, cluster_state); let res = unary .propose(&TestCommand::new_put(vec![1], 1), None, true, ctx) @@ -144,7 +151,7 @@ async fn test_unary_propose_slow_path_works() { }); let unary = init_unary_client(None, None); - let cluster_state = ClusterStateReady::new(0, 1, 0, connects); + let cluster_state = ClusterStateReady::new(0, 1, connects, build_default_membership()); let ctx = Context::new(ProposeId::default(), 0, cluster_state); let start_at = Instant::now(); let res = unary @@ -190,7 +197,7 @@ async fn test_unary_propose_fast_path_fallback_slow_path() { }); let unary = init_unary_client(None, None); - let cluster_state = ClusterStateReady::new(0, 1, 0, connects); + let cluster_state = ClusterStateReady::new(0, 1, connects, build_default_membership()); let ctx = Context::new(ProposeId::default(), 0, cluster_state); let start_at = Instant::now(); let res = unary @@ -238,7 +245,7 @@ async fn test_unary_propose_return_early_err() { }); let unary = init_unary_client(None, None); - let cluster_state = ClusterStateReady::new(0, 1, 0, connects); + let cluster_state = ClusterStateReady::new(0, 1, connects, build_default_membership()); let ctx = Context::new(ProposeId::default(), 0, cluster_state); let err = unary .propose(&TestCommand::new_put(vec![1], 1), None, true, ctx) @@ -277,7 +284,7 @@ async fn test_retry_propose_return_no_retry_error() { }); let unary = init_unary_client(None, None); - let cluster_state = ClusterStateReady::new(0, 1, 0, connects); + let cluster_state = ClusterStateReady::new(0, 1, connects, build_default_membership()); let retry = Retry::new( unary, RetryConfig::new_fixed(Duration::from_millis(100), 5), @@ -330,7 +337,7 @@ async fn test_retry_propose_return_retry_error() { .returning(move |_req, _timeout| Err(err.clone())); }); let unary = init_unary_client(None, None); - let cluster_state = ClusterStateReady::new(0, 1, 0, connects); + let cluster_state = ClusterStateReady::new(0, 1, connects, build_default_membership()); let retry = Retry::new( unary, RetryConfig::new_fixed(Duration::from_millis(10), 5), @@ -372,7 +379,7 @@ async fn test_read_index_success() { }); let unary = init_unary_client(None, None); - let cluster_state = ClusterStateReady::new(0, 1, 0, connects); + let cluster_state = ClusterStateReady::new(0, 1, connects, build_default_membership()); let ctx = Context::new(ProposeId::default(), 0, cluster_state); let res = unary .propose(&TestCommand::default(), None, true, ctx) @@ -407,7 +414,7 @@ async fn test_read_index_fail() { }); }); let unary = init_unary_client(None, None); - let cluster_state = ClusterStateReady::new(0, 1, 0, connects); + let cluster_state = ClusterStateReady::new(0, 1, connects, build_default_membership()); let ctx = Context::new(ProposeId::default(), 0, cluster_state); let res = unary .propose(&TestCommand::default(), None, true, ctx) diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index bc4f08579..954d95773 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -66,7 +66,7 @@ impl RepeatableClientApi for Unary { /// Send propose to shutdown cluster async fn propose_shutdown(&self, ctx: Context) -> Result<(), Self::Error> { - let req = ShutdownRequest::new(ctx.propose_id(), ctx.cluster_state().cluster_version()); + let req = ShutdownRequest::new(ctx.propose_id(), 0); let timeout = self.config.wait_synced_timeout(); let _resp = ctx .cluster_state() @@ -96,7 +96,7 @@ impl RepeatableClientApi for Unary { /// Send move leader request async fn move_leader(&self, node_id: u64, ctx: Context) -> Result<(), Self::Error> { - let req = MoveLeaderRequest::new(node_id, ctx.cluster_state().cluster_version()); + let req = MoveLeaderRequest::new(node_id, 0); let timeout = self.config.wait_synced_timeout(); let _resp = ctx .cluster_state() @@ -114,12 +114,10 @@ impl RepeatableClientApi for Unary { ) -> Result { // Same as fast_round, we blame the serializing error to the server even // thought it is the local error - let req = FetchReadStateRequest::new(cmd, ctx.cluster_state().cluster_version()).map_err( - |ser_err| { - warn!("serializing error: {ser_err}"); - CurpError::from(ser_err) - }, - )?; + let req = FetchReadStateRequest::new(cmd, 0).map_err(|ser_err| { + warn!("serializing error: {ser_err}"); + CurpError::from(ser_err) + })?; let timeout = self.config.wait_synced_timeout(); let state = ctx .cluster_state() diff --git a/crates/curp/src/client/unary/propose_impl.rs b/crates/curp/src/client/unary/propose_impl.rs index a9149e2dc..bdcf144b4 100644 --- a/crates/curp/src/client/unary/propose_impl.rs +++ b/crates/curp/src/client/unary/propose_impl.rs @@ -139,7 +139,7 @@ impl Unary { let propose_req = ProposeRequest::new::( ctx.propose_id(), cmd, - ctx.cluster_state().cluster_version(), + 0, term, !use_fast_path, ctx.first_incomplete(), diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index aaa98f5ae..8315862de 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -219,6 +219,12 @@ pub(crate) struct Membership { } impl Membership { + #[cfg(test)] + /// Creates a new `Membership` + pub(crate) fn new(members: Vec>, nodes: BTreeMap) -> Self { + Self { members, nodes } + } + /// Generates a new membership from `Change` /// /// Returns `None` if the change is invalid From bee42d158de69c36ac9ba2ae632ded7d4ccea794 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 11 Sep 2024 20:51:12 +0800 Subject: [PATCH 164/322] refactor: remove `discover_from` in curp client Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/mod.rs | 105 +++------------------- crates/curp/tests/it/common/curp_group.rs | 6 +- crates/curp/tests/it/server.rs | 2 +- crates/xline-client/src/lib.rs | 3 +- crates/xline/src/server/xline_server.rs | 2 +- 5 files changed, 18 insertions(+), 100 deletions(-) diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index befb0fad1..0f0d0429a 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -40,17 +40,16 @@ pub use connect::ClientApi; #[cfg(madsim)] use std::sync::atomic::AtomicU64; -use std::{collections::HashMap, fmt::Debug, ops::Deref, sync::Arc, time::Duration}; +use std::{collections::HashMap, ops::Deref, sync::Arc}; use curp_external_api::cmd::Command; -use futures::{stream::FuturesUnordered, StreamExt}; use parking_lot::RwLock; #[cfg(not(madsim))] use tonic::transport::ClientTlsConfig; -use tracing::{debug, warn}; +use tracing::debug; +use utils::config::ClientConfig; #[cfg(madsim)] use utils::ClientTlsConfig; -use utils::{build_endpoint, config::ClientConfig}; use self::{ cluster_state::{ClusterState, ClusterStateInit}, @@ -65,9 +64,7 @@ use crate::{ rpc::{ self, connect::{BypassedConnect, ConnectApi}, - protocol_client::ProtocolClient, - FetchClusterRequest, FetchClusterResponse, FetchMembershipResponse, Node, ProposeId, - Protocol, + FetchMembershipResponse, Node, ProposeId, Protocol, }, server::StreamingProtocol, tracker::Tracker, @@ -112,7 +109,7 @@ pub struct ClientBuilder { /// initial cluster version cluster_version: Option, /// initial cluster members - all_members: Option>>, + init_nodes: Option>>, /// is current client send request to raw curp server is_raw_curp: bool, /// initial leader state @@ -170,11 +167,11 @@ impl ClientBuilder { self } - /// Set the initial all members + /// Set the initial nodes #[inline] #[must_use] - pub fn all_members(mut self, all_members: HashMap>) -> Self { - self.all_members = Some(all_members); + pub fn init_nodes(mut self, nodes: impl IntoIterator>) -> Self { + self.init_nodes = Some(nodes.into_iter().collect()); self } @@ -194,85 +191,6 @@ impl ClientBuilder { self } - /// Discover the initial states from some endpoints - /// - /// # Errors - /// - /// Return `tonic::Status` for connection failure or some server errors. - #[inline] - pub async fn discover_from(mut self, addrs: Vec) -> Result { - /// Sleep duration in secs when the cluster is unavailable - const DISCOVER_SLEEP_DURATION: u64 = 1; - loop { - match self.try_discover_from(&addrs).await { - Ok(()) => return Ok(self), - Err(e) if matches!(e.code(), tonic::Code::Unavailable) => { - warn!("cluster is unavailable, sleep for {DISCOVER_SLEEP_DURATION} secs"); - tokio::time::sleep(Duration::from_secs(DISCOVER_SLEEP_DURATION)).await; - } - Err(e) => return Err(e), - } - } - } - - /// Discover the initial states from some endpoints - /// - /// # Errors - /// - /// Return `tonic::Status` for connection failure or some server errors. - #[inline] - pub async fn try_discover_from(&mut self, addrs: &[String]) -> Result<(), tonic::Status> { - let propose_timeout = *self.config.propose_timeout(); - let mut futs: FuturesUnordered<_> = addrs - .iter() - .map(|addr| { - let tls_config = self.tls_config.clone(); - async move { - let endpoint = build_endpoint(addr, tls_config.as_ref()).map_err(|e| { - tonic::Status::internal(format!("create endpoint failed, error: {e}")) - })?; - let channel = endpoint.connect().await.map_err(|e| { - tonic::Status::cancelled(format!("cannot connect to addr, error: {e}")) - })?; - let mut protocol_client = ProtocolClient::new(channel); - let mut req = tonic::Request::new(FetchClusterRequest::default()); - req.set_timeout(propose_timeout); - let fetch_cluster_res = protocol_client.fetch_cluster(req).await?.into_inner(); - Ok::(fetch_cluster_res) - } - }) - .collect(); - let mut err = tonic::Status::invalid_argument("addrs is empty"); - // find the first one return `FetchClusterResponse` - while let Some(r) = futs.next().await { - match r { - Ok(r) => { - self.cluster_version = Some(r.cluster_version); - if let Some(ref id) = r.leader_id { - self.leader_state = Some((id.into(), r.term)); - } - self.all_members = if self.is_raw_curp { - Some(r.into_peer_urls()) - } else { - Some(Self::ensure_no_empty_address(r.into_client_urls())?) - }; - return Ok(()); - } - Err(e) => err = e, - } - } - Err(err) - } - - /// Ensures that no server has an empty list of addresses. - fn ensure_no_empty_address( - urls: HashMap>, - ) -> Result>, tonic::Status> { - (!urls.values().any(Vec::is_empty)) - .then_some(urls) - .ok_or(tonic::Status::unavailable("cluster not published")) - } - /// Init retry config fn init_retry_config(&self) -> RetryConfig { if *self.config.fixed_backoff() { @@ -324,9 +242,12 @@ impl ClientBuilder { /// Connect to members fn connect_members(&self, tls_config: Option<&ClientTlsConfig>) -> ClusterStateInit { let all_members = self - .all_members + .init_nodes .clone() - .unwrap_or_else(|| unreachable!("requires members")); + .unwrap_or_else(|| unreachable!("requires members")) + .into_iter() + .map(|addrs| (0, addrs)) + .collect(); let connects = rpc::connects(all_members, tls_config) .map(|(_id, conn)| conn) .collect(); diff --git a/crates/curp/tests/it/common/curp_group.rs b/crates/curp/tests/it/common/curp_group.rs index a3b66b2d2..1ce20dde5 100644 --- a/crates/curp/tests/it/common/curp_group.rs +++ b/crates/curp/tests/it/common/curp_group.rs @@ -344,11 +344,9 @@ impl CurpGroup { } pub async fn new_client(&self) -> impl ClientApi { - let addrs = self.all_addrs().cloned().collect(); + let addrs: Vec> = self.all_addrs().cloned().map(|addr| vec![addr]).collect(); ClientBuilder::new(ClientConfig::default(), true) - .discover_from(addrs) - .await - .unwrap() + .init_nodes(addrs) .build() .unwrap() } diff --git a/crates/curp/tests/it/server.rs b/crates/curp/tests/it/server.rs index 97aad53d0..6019e5260 100644 --- a/crates/curp/tests/it/server.rs +++ b/crates/curp/tests/it/server.rs @@ -437,7 +437,7 @@ async fn shutdown_rpc_should_shutdown_the_cluster_when_client_has_wrong_leader() // build a client and set a wrong leader id let client = ClientBuilder::new(ClientConfig::default(), true) .leader_state(follower_id, 0) - .all_members(group.all_addrs_map()) + .init_nodes(group.all_addrs_map().values().cloned()) .build::() .unwrap(); client.propose_shutdown().await.unwrap(); diff --git a/crates/xline-client/src/lib.rs b/crates/xline-client/src/lib.rs index 7f81e4acb..c34051134 100644 --- a/crates/xline-client/src/lib.rs +++ b/crates/xline-client/src/lib.rs @@ -244,8 +244,7 @@ impl Client { let curp_client = Arc::new( CurpClientBuilder::new(options.client_config, false) .tls_config(options.tls_config) - .discover_from(addrs) - .await? + .init_nodes(addrs.into_iter().map(|addr| vec![addr])) .build::()?, ) as Arc; let id_gen = Arc::new(lease_gen::LeaseIdGenerator::new()); diff --git a/crates/xline/src/server/xline_server.rs b/crates/xline/src/server/xline_server.rs index b36fbd4ac..280c586d0 100644 --- a/crates/xline/src/server/xline_server.rs +++ b/crates/xline/src/server/xline_server.rs @@ -534,7 +534,7 @@ impl XlineServer { CurpClientBuilder::new(*self.cluster_config.client_config(), true) .tls_config(self.client_tls_config.clone()) .cluster_version(self.cluster_info.cluster_version()) - .all_members(self.cluster_info.all_members_peer_urls()) + .init_nodes(self.cluster_info.all_members_peer_urls().values().cloned()) .bypass(self.cluster_info.self_id(), curp_server.clone()) .build::()?, ) as Arc; From 17e76630b3cebd889ed3d9ae80a790d2d69c1718 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 11 Sep 2024 20:30:54 +0800 Subject: [PATCH 165/322] refactor: remove fetch cluster Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/proto/common | 2 +- crates/curp/src/client/keep_alive.rs | 26 ++----- crates/curp/src/client/retry.rs | 2 +- crates/curp/src/client/tests.rs | 46 ++++++++---- crates/curp/src/members.rs | 87 +---------------------- crates/curp/src/rpc/connect/mod.rs | 41 ++--------- crates/curp/src/rpc/mod.rs | 39 +--------- crates/curp/src/rpc/reconnect.rs | 20 ++---- crates/curp/src/server/curp_node/mod.rs | 42 +++-------- crates/curp/src/server/mod.rs | 12 ---- crates/curp/tests/it/common/curp_group.rs | 45 +++--------- crates/xline/src/server/auth_wrapper.rs | 15 ++-- crates/xline/src/server/xline_server.rs | 35 +++------ 13 files changed, 85 insertions(+), 327 deletions(-) diff --git a/crates/curp/proto/common b/crates/curp/proto/common index 1e485f7b5..90e2b55d8 160000 --- a/crates/curp/proto/common +++ b/crates/curp/proto/common @@ -1 +1 @@ -Subproject commit 1e485f7b531b75f24423b8746dc2dee24e83899d +Subproject commit 90e2b55d881b40cf28b2f69ae0e1424bbdc89416 diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index b6a35e493..60aab646b 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -157,12 +157,12 @@ mod tests { rpc::{ connect::{ConnectApi, MockConnectApi}, AddLearnerRequest, AddLearnerResponse, AddMemberRequest, AddMemberResponse, CurpError, - FetchClusterRequest, FetchClusterResponse, FetchMembershipRequest, - FetchMembershipResponse, FetchReadStateRequest, FetchReadStateResponse, Member, - MoveLeaderRequest, MoveLeaderResponse, OpResponse, ProposeId, ProposeRequest, - ProposeResponse, PublishRequest, PublishResponse, ReadIndexResponse, RecordRequest, - RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, RemoveMemberRequest, - RemoveMemberResponse, ResponseOp, ShutdownRequest, ShutdownResponse, SyncedResponse, + FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, + FetchReadStateResponse, Member, MoveLeaderRequest, MoveLeaderResponse, OpResponse, + ProposeId, ProposeRequest, ProposeResponse, PublishRequest, PublishResponse, + ReadIndexResponse, RecordRequest, RecordResponse, RemoveLearnerRequest, + RemoveLearnerResponse, RemoveMemberRequest, RemoveMemberResponse, ResponseOp, + ShutdownRequest, ShutdownResponse, SyncedResponse, }, }; @@ -232,20 +232,6 @@ mod tests { unreachable!("please use MockedConnectApi") } - /// Send `FetchClusterRequest` - async fn fetch_cluster( - &self, - _request: FetchClusterRequest, - _timeout: Duration, - ) -> Result, CurpError> { - let members = (0..5) - .into_iter() - .map(|id| Member::new(id, format!("{id}"), vec![], vec![], false)) - .collect(); - let resp = FetchClusterResponse::new(Some(0), 1, 1, members, 1); - Ok(tonic::Response::new(resp)) - } - /// Send `FetchReadStateRequest` async fn fetch_read_state( &self, diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 1b44cd161..1d0469527 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -19,7 +19,7 @@ use super::{ }; use crate::{ members::ServerId, - rpc::{CurpError, FetchClusterResponse, ReadState, Redirect, ProposeId, FetchMembershipResponse}, tracker::Tracker, + rpc::{CurpError, ReadState, Redirect, ProposeId, FetchMembershipResponse}, tracker::Tracker, }; /// Backoff config diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index 163feff62..3bcbaf5ea 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -24,8 +24,9 @@ use crate::{ member::Membership, members::ServerId, rpc::{ + self, connect::{ConnectApi, MockConnectApi}, - CurpError, FetchClusterResponse, Member, OpResponse, ProposeId, ProposeResponse, + CurpError, FetchMembershipResponse, Node, OpResponse, ProposeId, ProposeResponse, ReadIndexResponse, RecordResponse, ResponseOp, SyncedResponse, }, }; @@ -90,6 +91,33 @@ fn build_default_membership() -> Membership { Membership::new(vec![members], nodes) } +fn build_membership_resp( + leader_id: Option, + term: u64, + members: impl IntoIterator, +) -> Result, CurpError> { + let leader_id = leader_id.ok_or(CurpError::leader_transfer("no current leader"))?; + + let members: Vec<_> = members.into_iter().collect(); + let nodes: Vec = members + .clone() + .into_iter() + .map(|node_id| Node { + node_id, + addr: String::new(), + }) + .collect(); + let qs = rpc::QuorumSet { set: members }; + + let resp = FetchMembershipResponse { + members: vec![qs], + nodes, + term, + leader_id, + }; + Ok(tonic::Response::new(resp)) +} + #[traced_test] #[tokio::test] async fn test_unary_propose_fast_path_works() { @@ -309,21 +337,9 @@ async fn test_retry_propose_return_retry_error() { CurpError::internal("No reason"), ] { let connects = init_mocked_connects(5, |id, conn| { - conn.expect_fetch_cluster() + conn.expect_fetch_membership() .returning(move |_req, _timeout| { - Ok(tonic::Response::new(FetchClusterResponse { - leader_id: Some(0.into()), - term: 2, - cluster_id: 123, - members: vec![ - Member::new(0, "S0", vec!["A0".to_owned()], [], false), - Member::new(1, "S1", vec!["A1".to_owned()], [], false), - Member::new(2, "S2", vec!["A2".to_owned()], [], false), - Member::new(3, "S3", vec!["A3".to_owned()], [], false), - Member::new(4, "S4", vec!["A4".to_owned()], [], false), - ], - cluster_version: 1, - })) + build_membership_resp(Some(0), 2, vec![0, 1, 2, 3, 4]) }); if id == 0 { let err = early_err.clone(); diff --git a/crates/curp/src/members.rs b/crates/curp/src/members.rs index 3658cad1d..619b68fdd 100644 --- a/crates/curp/src/members.rs +++ b/crates/curp/src/members.rs @@ -5,19 +5,16 @@ use std::{ atomic::{AtomicU64, Ordering}, Arc, }, - time::Duration, }; use dashmap::{mapref::one::Ref, DashMap}; -use futures::{stream::FuturesUnordered, StreamExt}; use itertools::Itertools; #[cfg(not(madsim))] -use tonic::transport::ClientTlsConfig; -use tracing::{debug, info}; +use tracing::debug; #[cfg(madsim)] use utils::ClientTlsConfig; -use crate::rpc::{self, FetchClusterRequest, FetchClusterResponse, Member}; +use crate::rpc::Member; /// Server Id pub type ServerId = u64; @@ -132,47 +129,6 @@ impl ClusterInfo { cluster_info } - /// Construct a new `ClusterInfo` from `FetchClusterResponse` - /// - /// # Panics - /// - /// panic if `cluster.members` doesn't contain `self_addr` - #[inline] - #[must_use] - pub fn from_cluster( - cluster: FetchClusterResponse, - self_peer_urls: &[String], - self_client_urls: &[String], - self_name: &str, - ) -> Self { - let mut member_id = 0; - let sorted_self_addr = self_peer_urls.iter().sorted(); - let members = cluster - .members - .into_iter() - .map(|mut member| { - if member - .peer_urls() - .iter() - .sorted() - .eq(sorted_self_addr.clone()) - { - member_id = member.id; - member.name = self_name.to_owned(); - member.client_urls = self_client_urls.to_vec(); - } - (member.id, member) - }) - .collect(); - assert!(member_id != 0, "self_id should not be 0"); - Self { - cluster_id: cluster.cluster_id, - member_id, - members, - cluster_version: Arc::new(AtomicU64::new(cluster.cluster_version)), - } - } - /// Get all members #[must_use] #[inline] @@ -395,45 +351,6 @@ impl ClusterInfo { } } -/// Get cluster info from remote servers -#[inline] -pub async fn get_cluster_info_from_remote( - init_cluster_info: &ClusterInfo, - self_peer_urls: &[String], - self_name: &str, - timeout: Duration, - tls_config: Option<&ClientTlsConfig>, -) -> Option { - let peers = init_cluster_info.peers_addrs(); - let self_client_urls = init_cluster_info.self_client_urls(); - let connects = rpc::connects(peers, tls_config) - .map(|pair| pair.1) - .collect_vec(); - let mut futs = connects - .iter() - .map(|c| { - c.fetch_cluster( - FetchClusterRequest { - linearizable: false, - }, - timeout, - ) - }) - .collect::>(); - while let Some(result) = futs.next().await { - if let Ok(cluster_res) = result { - info!("get cluster info from remote success: {:?}", cluster_res); - return Some(ClusterInfo::from_cluster( - cluster_res.into_inner(), - self_peer_urls, - self_client_urls.as_slice(), - self_name, - )); - } - } - None -} - #[cfg(test)] mod tests { use super::*; diff --git a/crates/curp/src/rpc/connect/mod.rs b/crates/curp/src/rpc/connect/mod.rs index 7d88781b2..f7f7f288a 100644 --- a/crates/curp/src/rpc/connect/mod.rs +++ b/crates/curp/src/rpc/connect/mod.rs @@ -38,12 +38,11 @@ use crate::{ inner_messagepb::inner_protocol_client::InnerProtocolClient, }, AddMemberRequest, AddMemberResponse, AppendEntriesRequest, AppendEntriesResponse, - CurpError, FetchClusterRequest, FetchClusterResponse, FetchReadStateRequest, - FetchReadStateResponse, InstallSnapshotRequest, InstallSnapshotResponse, LeaseKeepAliveMsg, - MoveLeaderRequest, MoveLeaderResponse, ProposeRequest, Protocol, PublishRequest, - PublishResponse, RemoveMemberRequest, RemoveMemberResponse, ShutdownRequest, - ShutdownResponse, TriggerShutdownRequest, TryBecomeLeaderNowRequest, VoteRequest, - VoteResponse, + CurpError, FetchReadStateRequest, FetchReadStateResponse, InstallSnapshotRequest, + InstallSnapshotResponse, LeaseKeepAliveMsg, MoveLeaderRequest, MoveLeaderResponse, + ProposeRequest, Protocol, PublishRequest, PublishResponse, RemoveMemberRequest, + RemoveMemberResponse, ShutdownRequest, ShutdownResponse, TriggerShutdownRequest, + TryBecomeLeaderNowRequest, VoteRequest, VoteResponse, }, server::StreamingProtocol, snapshot::Snapshot, @@ -208,13 +207,6 @@ pub(crate) trait ConnectApi: Send + Sync + 'static { timeout: Duration, ) -> Result, CurpError>; - /// Send `FetchClusterRequest` - async fn fetch_cluster( - &self, - request: FetchClusterRequest, - timeout: Duration, - ) -> Result, CurpError>; - /// Send `FetchReadStateRequest` async fn fetch_read_state( &self, @@ -500,17 +492,6 @@ impl ConnectApi for Connect> { with_timeout!(timeout, client.publish(req)).map_err(Into::into) } - /// Send `FetchClusterRequest` - async fn fetch_cluster( - &self, - request: FetchClusterRequest, - timeout: Duration, - ) -> Result, CurpError> { - let mut client = self.rpc_connect.clone(); - let req = tonic::Request::new(request); - with_timeout!(timeout, client.fetch_cluster(req)).map_err(Into::into) - } - /// Send `FetchReadStateRequest` async fn fetch_read_state( &self, @@ -833,18 +814,6 @@ where self.server.shutdown(req).await.map_err(Into::into) } - /// Send `FetchClusterRequest` - async fn fetch_cluster( - &self, - request: FetchClusterRequest, - _timeout: Duration, - ) -> Result, CurpError> { - let mut req = tonic::Request::new(request); - req.metadata_mut().inject_bypassed(); - req.metadata_mut().inject_current(); - self.server.fetch_cluster(req).await.map_err(Into::into) - } - /// Send `FetchReadStateRequest` async fn fetch_read_state( &self, diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index 684bfa99d..d7c51709a 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, sync::Arc}; +use std::sync::Arc; use curp_external_api::{ cmd::{ConflictCheck, PbCodec, PbSerializeError}, @@ -31,8 +31,6 @@ pub use self::proto::{ AddMemberRequest, AddMemberResponse, CmdResult, - FetchClusterRequest, - FetchClusterResponse, FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, @@ -142,41 +140,6 @@ impl From<&OptionalU64> for u64 { } } -impl FetchClusterResponse { - /// Create a new `FetchClusterResponse` - pub(crate) fn new( - leader_id: Option, - term: u64, - cluster_id: u64, - members: Vec, - cluster_version: u64, - ) -> Self { - Self { - leader_id: leader_id.map(Into::into), - term, - cluster_id, - members, - cluster_version, - } - } - - /// Get all members peer urls - pub(crate) fn into_peer_urls(self) -> HashMap> { - self.members - .into_iter() - .map(|member| (member.id, member.peer_urls)) - .collect() - } - - /// Get all members peer urls - pub(crate) fn into_client_urls(self) -> HashMap> { - self.members - .into_iter() - .map(|member| (member.id, member.client_urls)) - .collect() - } -} - impl ProposeRequest { /// Create a new `Propose` request #[inline] diff --git a/crates/curp/src/rpc/reconnect.rs b/crates/curp/src/rpc/reconnect.rs index fb2cc713c..09de8fc24 100644 --- a/crates/curp/src/rpc/reconnect.rs +++ b/crates/curp/src/rpc/reconnect.rs @@ -8,12 +8,11 @@ use crate::{ members::ServerId, rpc::{ connect::ConnectApi, AddLearnerRequest, AddLearnerResponse, AddMemberRequest, - AddMemberResponse, CurpError, FetchClusterRequest, FetchClusterResponse, - FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, - FetchReadStateResponse, MoveLeaderRequest, MoveLeaderResponse, OpResponse, ProposeRequest, - PublishRequest, PublishResponse, ReadIndexResponse, RecordRequest, RecordResponse, - RemoveLearnerRequest, RemoveLearnerResponse, RemoveMemberRequest, RemoveMemberResponse, - ShutdownRequest, ShutdownResponse, + AddMemberResponse, CurpError, FetchMembershipRequest, FetchMembershipResponse, + FetchReadStateRequest, FetchReadStateResponse, MoveLeaderRequest, MoveLeaderResponse, + OpResponse, ProposeRequest, PublishRequest, PublishResponse, ReadIndexResponse, + RecordRequest, RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, + RemoveMemberRequest, RemoveMemberResponse, ShutdownRequest, ShutdownResponse, }, }; @@ -138,15 +137,6 @@ impl ConnectApi for Reconnect { execute_with_reconnect!(self, ConnectApi::shutdown, request, timeout) } - /// Send `FetchClusterRequest` - async fn fetch_cluster( - &self, - request: FetchClusterRequest, - timeout: Duration, - ) -> Result, CurpError> { - execute_with_reconnect!(self, ConnectApi::fetch_cluster, request, timeout) - } - /// Send `FetchReadStateRequest` async fn fetch_read_state( &self, diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 4628ddc9e..e5b8ea954 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -47,14 +47,14 @@ use crate::{ rpc::{ self, connect::{InnerConnectApi, InnerConnectApiWrapper}, - AppendEntriesRequest, AppendEntriesResponse, CurpError, FetchClusterRequest, - FetchClusterResponse, FetchMembershipRequest, FetchMembershipResponse, - FetchReadStateRequest, FetchReadStateResponse, InstallSnapshotRequest, - InstallSnapshotResponse, LeaseKeepAliveMsg, MoveLeaderRequest, MoveLeaderResponse, Node, - PoolEntry, ProposeId, ProposeRequest, ProposeResponse, PublishRequest, PublishResponse, - QuorumSet, ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, - ShutdownResponse, SyncedResponse, TriggerShutdownRequest, TriggerShutdownResponse, - TryBecomeLeaderNowRequest, TryBecomeLeaderNowResponse, VoteRequest, VoteResponse, + AppendEntriesRequest, AppendEntriesResponse, CurpError, FetchMembershipRequest, + FetchMembershipResponse, FetchReadStateRequest, FetchReadStateResponse, + InstallSnapshotRequest, InstallSnapshotResponse, LeaseKeepAliveMsg, MoveLeaderRequest, + MoveLeaderResponse, Node, PoolEntry, ProposeId, ProposeRequest, ProposeResponse, + PublishRequest, PublishResponse, QuorumSet, ReadIndexResponse, RecordRequest, + RecordResponse, ShutdownRequest, ShutdownResponse, SyncedResponse, TriggerShutdownRequest, + TriggerShutdownResponse, TryBecomeLeaderNowRequest, TryBecomeLeaderNowResponse, + VoteRequest, VoteResponse, }, server::{ cmd_worker::{after_sync, worker_reset, worker_snapshot}, @@ -499,32 +499,6 @@ impl, RC: RoleChange> CurpNode { TriggerShutdownResponse::default() } - /// Handle `FetchCluster` requests - #[allow(clippy::unnecessary_wraps, clippy::needless_pass_by_value)] // To keep type consistent with other request handlers - pub(super) fn fetch_cluster( - &self, - req: FetchClusterRequest, - ) -> Result { - let (leader_id, term, is_leader) = self.curp.leader(); - let cluster_id = self.curp.cluster().cluster_id(); - let members = if is_leader || !req.linearizable { - self.curp.cluster().all_members_vec() - } else { - // if it is a follower and enabled linearizable read, return empty members - // the client will ignore empty members and retry util it gets response from - // the leader - Vec::new() - }; - let cluster_version = self.curp.cluster().cluster_version(); - Ok(FetchClusterResponse::new( - leader_id, - term, - cluster_id, - members, - cluster_version, - )) - } - /// Handle `InstallSnapshot` stream #[allow(clippy::arithmetic_side_effects)] // can't overflow pub(super) async fn install_snapshot( diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index 78d4a6cfa..faae437f4 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -30,8 +30,6 @@ use crate::rpc::AddLearnerRequest; use crate::rpc::AddLearnerResponse; use crate::rpc::AppendEntriesRequest; use crate::rpc::AppendEntriesResponse; -use crate::rpc::FetchClusterRequest; -use crate::rpc::FetchClusterResponse; use crate::rpc::FetchMembershipRequest; use crate::rpc::FetchMembershipResponse; use crate::rpc::FetchReadStateRequest; @@ -171,16 +169,6 @@ impl, RC: RoleChange> crate::rpc::Protocol fo )) } - #[instrument(skip_all, name = "curp_fetch_cluster")] - async fn fetch_cluster( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - Ok(tonic::Response::new( - self.inner.fetch_cluster(request.into_inner())?, - )) - } - #[instrument(skip_all, name = "curp_fetch_read_state")] async fn fetch_read_state( &self, diff --git a/crates/curp/tests/it/common/curp_group.rs b/crates/curp/tests/it/common/curp_group.rs index 1ce20dde5..b904f2691 100644 --- a/crates/curp/tests/it/common/curp_group.rs +++ b/crates/curp/tests/it/common/curp_group.rs @@ -56,10 +56,11 @@ pub mod commandpb { } pub use commandpb::{ - protocol_client::ProtocolClient, FetchClusterRequest, FetchClusterResponse, ProposeRequest, - ProposeResponse, + protocol_client::ProtocolClient, FetchMembershipResponse, ProposeRequest, ProposeResponse, }; +use self::commandpb::FetchMembershipRequest; + /// `BOTTOM_TASKS` are tasks which not dependent on other tasks in the task group. /// `CurpGroup` uses `BOTTOM_TASKS` to detect whether the curp group is closed or not. const BOTTOM_TASKS: [TaskName; 2] = [TaskName::WatchTask, TaskName::ConfChange]; @@ -444,21 +445,21 @@ impl CurpGroup { Err(e) => continue, }; - let FetchClusterResponse { + let FetchMembershipResponse { leader_id, term, .. - } = if let Ok(resp) = client.fetch_cluster(FetchClusterRequest::default()).await { + } = if let Ok(resp) = client.fetch_membership(FetchMembershipRequest {}).await { resp.into_inner() } else { continue; }; if term > max_term { max_term = term; - leader = leader_id; + leader = Some(leader_id); } else if term == max_term && leader.is_none() { - leader = leader_id; + leader = Some(leader_id); } } - leader.map(|l| (l.value, max_term)) + leader.map(|l| (l, max_term)) } pub async fn get_leader(&self) -> (ServerId, u64) { @@ -485,9 +486,9 @@ impl CurpGroup { Err(e) => continue, }; - let FetchClusterResponse { + let FetchMembershipResponse { leader_id, term, .. - } = if let Ok(resp) = client.fetch_cluster(FetchClusterRequest::default()).await { + } = if let Ok(resp) = client.fetch_membership(FetchMembershipRequest {}).await { resp.into_inner() } else { continue; @@ -512,32 +513,6 @@ impl CurpGroup { let channel = channel_fut.await.unwrap(); ProtocolClient::new(channel) } - - pub async fn fetch_cluster_info(&self, addrs: &[String], name: &str) -> ClusterInfo { - let leader_id = self.get_leader().await.0; - let mut connect = self.get_connect(&leader_id).await; - let client_urls: Vec = vec![]; - let cluster_res_base = connect - .fetch_cluster(tonic::Request::new(FetchClusterRequest { - linearizable: false, - })) - .await - .unwrap() - .into_inner(); - let members = cluster_res_base - .members - .into_iter() - .map(|m| Member::new(m.id, m.name, m.peer_urls, m.client_urls, m.is_learner)) - .collect(); - let cluster_res = curp::rpc::FetchClusterResponse { - leader_id: cluster_res_base.leader_id.map(|l| l.value.into()), - term: cluster_res_base.term, - cluster_id: cluster_res_base.cluster_id, - members, - cluster_version: cluster_res_base.cluster_version, - }; - ClusterInfo::from_cluster(cluster_res, addrs, client_urls.as_slice(), name) - } } impl Drop for CurpGroup { diff --git a/crates/xline/src/server/auth_wrapper.rs b/crates/xline/src/server/auth_wrapper.rs index 37b4b25b4..87a14fc56 100644 --- a/crates/xline/src/server/auth_wrapper.rs +++ b/crates/xline/src/server/auth_wrapper.rs @@ -4,10 +4,10 @@ use curp::{ cmd::PbCodec, rpc::{ AddLearnerRequest, AddLearnerResponse, AddMemberRequest, AddMemberResponse, - FetchClusterRequest, FetchClusterResponse, FetchMembershipRequest, FetchMembershipResponse, - FetchReadStateRequest, FetchReadStateResponse, LeaseKeepAliveMsg, MoveLeaderRequest, - MoveLeaderResponse, OpResponse, ProposeRequest, Protocol, PublishRequest, PublishResponse, - ReadIndexRequest, ReadIndexResponse, RecordRequest, RecordResponse, RemoveLearnerRequest, + FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, + FetchReadStateResponse, LeaseKeepAliveMsg, MoveLeaderRequest, MoveLeaderResponse, + OpResponse, ProposeRequest, Protocol, PublishRequest, PublishResponse, ReadIndexRequest, + ReadIndexResponse, RecordRequest, RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, RemoveMemberRequest, RemoveMemberResponse, ShutdownRequest, ShutdownResponse, }, @@ -89,13 +89,6 @@ impl Protocol for AuthWrapper { self.curp_server.publish(request).await } - async fn fetch_cluster( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - self.curp_server.fetch_cluster(request).await - } - async fn fetch_read_state( &self, request: tonic::Request, diff --git a/crates/xline/src/server/xline_server.rs b/crates/xline/src/server/xline_server.rs index 280c586d0..ff22732fe 100644 --- a/crates/xline/src/server/xline_server.rs +++ b/crates/xline/src/server/xline_server.rs @@ -5,7 +5,7 @@ use clippy_utilities::{NumericCast, OverflowArithmetic}; use curp::{ client::ClientBuilder as CurpClientBuilder, member::MembershipInfo, - members::{get_cluster_info_from_remote, ClusterInfo}, + members::ClusterInfo, rpc::{InnerProtocolServer, ProtocolServer}, server::{Rpc, StorageApi as _, DB as CurpDB}, }; @@ -116,14 +116,11 @@ impl XlineServer { #[cfg(madsim)] let (client_tls_config, server_tls_config) = (None, None); let curp_storage = Arc::new(CurpDB::open(&cluster_config.curp_config().engine_cfg)?); - let cluster_info = Arc::new( - Self::init_cluster_info( - &cluster_config, - curp_storage.as_ref(), - client_tls_config.as_ref(), - ) - .await?, - ); + let cluster_info = Arc::new(Self::init_cluster_info( + &cluster_config, + curp_storage.as_ref(), + client_tls_config.as_ref(), + )?); let membership_info = MembershipInfo::new( *cluster_config.node_id(), cluster_config.initial_membership_info().clone(), @@ -143,11 +140,12 @@ impl XlineServer { }) } + #[allow(clippy::todo)] /// Init cluster info from cluster config - async fn init_cluster_info( + fn init_cluster_info( cluster_config: &ClusterConfig, curp_storage: &CurpDB, - tls_config: Option<&ClientTlsConfig>, + _tls_config: Option<&ClientTlsConfig>, ) -> Result { info!("name = {:?}", cluster_config.name()); info!("cluster_peers = {:?}", cluster_config.peers()); @@ -155,7 +153,6 @@ impl XlineServer { let name = cluster_config.name().clone(); let all_members = cluster_config.peers().clone(); let self_client_urls = cluster_config.client_advertise_urls().clone(); - let self_peer_urls = cluster_config.peer_advertise_urls().clone(); match ( curp_storage.recover_cluster_info()?, *cluster_config.initial_cluster_state(), @@ -172,18 +169,8 @@ impl XlineServer { Ok(cluster_info) } (None, InitialClusterState::Existing) => { - info!("get cluster_info from remote"); - let cluster_info = get_cluster_info_from_remote( - &ClusterInfo::from_members_map(all_members, self_client_urls, &name), - &self_peer_urls, - cluster_config.name(), - *cluster_config.client_config().wait_synced_timeout(), - tls_config, - ) - .await - .ok_or_else(|| anyhow!("Failed to get cluster info from remote"))?; - curp_storage.put_cluster_info(&cluster_info)?; - Ok(cluster_info) + // FIXME + todo!("adding a new member to the cluster"); } (None, _) => { unreachable!("xline only supports two initial cluster states: new, existing") From a7201a917d897be960116587e293e23b29c9f42f Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:28:14 +0800 Subject: [PATCH 166/322] feat: Add cluster id generation for Membership Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 42 +++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 8315862de..700b61326 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -1,8 +1,10 @@ use std::collections::btree_map::Entry; +use std::collections::hash_map::DefaultHasher; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::HashSet; use std::hash::Hash; +use std::hash::Hasher; use std::iter; use std::sync::Arc; @@ -35,6 +37,16 @@ impl MembershipInfo { init_members, } } + + /// Converts `MembershipInfo` into a `Membership`. + pub(crate) fn into_membership(self) -> Membership { + let MembershipInfo { init_members, .. } = self; + + Membership { + nodes: init_members.clone(), + members: vec![init_members.into_keys().collect()], + } + } } /// The membership state of the node @@ -56,14 +68,8 @@ impl NodeMembershipState { info: MembershipInfo, init_connects: BTreeMap, ) -> Self { - let MembershipInfo { - node_id, - init_members, - } = info; - let init_ms = Membership { - members: vec![init_members.keys().copied().collect()], - nodes: init_members, - }; + let node_id = info.node_id; + let init_ms = info.into_membership(); let cluster_state = MembershipState { effective: init_ms, index_effective: 1, @@ -361,3 +367,23 @@ pub(crate) enum Change { /// Removes members RemoveMember(Vec), } + +/// Trait for types that can provide a cluster ID. +trait ClusterId { + /// Returns the cluster ID. + fn cluster_id(&self) -> u64; +} + +impl ClusterId for Membership { + fn cluster_id(&self) -> u64 { + let mut hasher = DefaultHasher::new(); + self.hash(&mut hasher); + hasher.finish() + } +} + +impl ClusterId for MembershipInfo { + fn cluster_id(&self) -> u64 { + self.clone().into_membership().cluster_id() + } +} From 79fa759e3be7a74ab85ba958f4d8014f23dd763c Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 12 Sep 2024 16:30:07 +0800 Subject: [PATCH 167/322] refactor: remove ClusterInfo and related code Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 8 +- crates/curp/src/members.rs | 18 ---- crates/curp/src/server/cmd_worker/mod.rs | 11 +-- crates/curp/src/server/curp_node/mod.rs | 48 +++------- crates/curp/src/server/metrics.rs | 4 +- crates/curp/src/server/mod.rs | 3 - crates/curp/src/server/raw_curp/mod.rs | 44 +++++---- crates/curp/src/server/raw_curp/tests.rs | 73 +++++++------- crates/curp/tests/it/common/curp_group.rs | 2 - crates/xline/src/server/lease_server.rs | 24 +---- crates/xline/src/server/maintenance.rs | 9 +- crates/xline/src/server/xline_server.rs | 110 ++++++---------------- 12 files changed, 117 insertions(+), 237 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 700b61326..aa89141f9 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -102,6 +102,11 @@ impl NodeMembershipState { self.cluster().effective().contains(self.node_id()) } + /// Returns `true` if the given node is a member of the cluster + pub(crate) fn check_membership(&self, node_id: u64) -> bool { + self.cluster().effective().contains(node_id) + } + /// Updates the connects /// /// Returns a pair of (removed, added) connects @@ -369,7 +374,7 @@ pub(crate) enum Change { } /// Trait for types that can provide a cluster ID. -trait ClusterId { +pub trait ClusterId { /// Returns the cluster ID. fn cluster_id(&self) -> u64; } @@ -383,6 +388,7 @@ impl ClusterId for Membership { } impl ClusterId for MembershipInfo { + #[inline] fn cluster_id(&self) -> u64 { self.clone().into_membership().cluster_id() } diff --git a/crates/curp/src/members.rs b/crates/curp/src/members.rs index 619b68fdd..2a15c966c 100644 --- a/crates/curp/src/members.rs +++ b/crates/curp/src/members.rs @@ -10,7 +10,6 @@ use std::{ use dashmap::{mapref::one::Ref, DashMap}; use itertools::Itertools; #[cfg(not(madsim))] -use tracing::debug; #[cfg(madsim)] use utils::ClientTlsConfig; @@ -332,23 +331,6 @@ impl ClusterInfo { .iter() .find_map(|m| (m.name == name).then_some(m.id)) } - - /// Check if cluster contains a node - pub(crate) fn contains(&self, node_id: ServerId) -> bool { - self.members.contains_key(&node_id) - } - - /// Set state for a node - pub(crate) fn set_node_state(&self, node_id: ServerId, name: String, client_urls: Vec) { - if let Some(mut s) = self.members.get_mut(&node_id) { - debug!( - "set name and client_urls for node {} to {},{:?}", - node_id, name, client_urls - ); - s.name = name; - s.client_urls = client_urls; - } - } } #[cfg(test)] diff --git a/crates/curp/src/server/cmd_worker/mod.rs b/crates/curp/src/server/cmd_worker/mod.rs index 619b44194..f271e9e24 100644 --- a/crates/curp/src/server/cmd_worker/mod.rs +++ b/crates/curp/src/server/cmd_worker/mod.rs @@ -165,14 +165,9 @@ fn after_sync_others, RC: RoleChange>( } cb.write().notify_shutdown(); } - (EntryData::SetNodeState(node_id, ref name, ref client_urls), _) => { - info!("setting node state: {node_id}, urls: {:?}", client_urls); - if let Err(e) = ce.set_last_applied(entry.index) { - error!("failed to set last_applied, {e}"); - return; - } - curp.cluster() - .set_node_state(*node_id, name.clone(), client_urls.clone()); + #[allow(clippy::todo)] // FIXME: Remove this + (EntryData::SetNodeState(..), _) => { + todo!() } // The no-op command has been applied to state machine (EntryData::Empty, _) => curp.set_no_op_applied(), diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index e5b8ea954..7e2e894d6 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -41,7 +41,7 @@ use crate::{ cmd::{Command, CommandExecutor}, log_entry::{EntryData, LogEntry}, member::{Membership, MembershipInfo}, - members::{ClusterInfo, ServerId}, + members::ServerId, response::ResponseSender, role_change::RoleChange, rpc::{ @@ -168,7 +168,6 @@ impl, RC: RoleChange> CurpNode { return Err(CurpError::shutting_down()); } self.curp.check_leader_transfer()?; - self.check_cluster_version(req.cluster_version)?; self.curp.check_term(req.term)?; if req.slow_path { @@ -338,7 +337,6 @@ impl, RC: RoleChange> CurpNode { req: ShutdownRequest, bypassed: bool, ) -> Result { - self.check_cluster_version(req.cluster_version)?; if bypassed { self.curp.mark_client_id_bypassed(req.propose_id().0); } @@ -572,7 +570,6 @@ impl, RC: RoleChange> CurpNode { &self, req: FetchReadStateRequest, ) -> Result { - self.check_cluster_version(req.cluster_version)?; let cmd = req.cmd()?; let state = self.curp.handle_fetch_read_state(Arc::new(cmd)); Ok(FetchReadStateResponse::new(state)) @@ -583,7 +580,6 @@ impl, RC: RoleChange> CurpNode { &self, req: MoveLeaderRequest, ) -> Result { - self.check_cluster_version(req.cluster_version)?; let should_send_try_become_leader_now = self.curp.handle_move_leader(req.node_id)?; if should_send_try_become_leader_now { if let Err(e) = self @@ -775,7 +771,6 @@ impl, RC: RoleChange> CurpNode { #[allow(clippy::needless_pass_by_value)] // The value should be consumed pub(super) fn new( membership_info: MembershipInfo, - cluster_info: Arc, is_leader: bool, cmd_executor: Arc, snapshot_allocator: Box, @@ -800,18 +795,15 @@ impl, RC: RoleChange> CurpNode { .collect(), )); + let peer_addrs: HashMap<_, _> = membership_info + .init_members + .clone() + .into_iter() + .map(|(id, addr)| (id, vec![addr])) + .collect(); let connects = - rpc::inner_connects(cluster_info.peers_addrs(), client_tls_config.as_ref()).collect(); - let member_connects = rpc::inner_connects( - membership_info - .init_members - .clone() - .into_iter() - .map(|(id, addr)| (id, vec![addr])) - .collect(), - client_tls_config.as_ref(), - ) - .collect(); + rpc::inner_connects(peer_addrs.clone(), client_tls_config.as_ref()).collect(); + let member_connects = rpc::inner_connects(peer_addrs, client_tls_config.as_ref()).collect(); let cmd_board = Arc::new(RwLock::new(CommandBoard::new())); let lease_manager = Arc::new(RwLock::new(LeaseManager::new())); let last_applied = cmd_executor @@ -825,7 +817,6 @@ impl, RC: RoleChange> CurpNode { let (voted_for, entries) = storage.recover()?; let curp = Arc::new( RawCurp::builder() - .cluster_info(Arc::clone(&cluster_info)) .is_leader(is_leader) .cmd_board(Arc::clone(&cmd_board)) .lease_manager(Arc::clone(&lease_manager)) @@ -1076,19 +1067,6 @@ impl, RC: RoleChange> CurpNode { .is_err()) } - /// Check cluster version and return new cluster - fn check_cluster_version(&self, client_cluster_version: u64) -> Result<(), CurpError> { - let server_cluster_version = self.curp.cluster().cluster_version(); - if client_cluster_version != server_cluster_version { - debug!( - "client cluster version({}) and server cluster version({}) not match", - client_cluster_version, server_cluster_version - ); - return Err(CurpError::wrong_cluster_version()); - } - Ok(()) - } - /// Get `RawCurp` pub(super) fn raw_curp(&self) -> Arc> { Arc::clone(&self.curp) @@ -1229,7 +1207,7 @@ mod tests { .expect_append_entries() .times(1..) .returning(|_, _| Ok(tonic::Response::new(AppendEntriesResponse::new_accept(0)))); - let s1_id = curp.cluster().get_id_by_name("S1").unwrap(); + let s1_id = curp.get_id_by_name("S1").unwrap(); mock_connect1.expect_id().return_const(s1_id); let remove_event = Arc::new(Event::new()); task_manager.spawn(TaskName::SyncFollower, |n| { @@ -1256,7 +1234,7 @@ mod tests { Arc::clone(&task_manager), )) }; - let s2_id = curp.cluster().get_id_by_name("S2").unwrap(); + let s2_id = curp.get_id_by_name("S2").unwrap(); curp.handle_append_entries(2, s2_id, 0, 0, vec![], 0, |_, _, _| {}) .unwrap(); @@ -1266,7 +1244,7 @@ mod tests { VoteResponse::new_accept::(req.term, vec![]).unwrap(), )) }); - let s1_id = curp.cluster().get_id_by_name("S1").unwrap(); + let s1_id = curp.get_id_by_name("S1").unwrap(); mock_connect1.expect_id().return_const(s1_id); curp.set_connect( s1_id, @@ -1279,7 +1257,7 @@ mod tests { VoteResponse::new_accept::(req.term, vec![]).unwrap(), )) }); - let s2_id = curp.cluster().get_id_by_name("S2").unwrap(); + let s2_id = curp.get_id_by_name("S2").unwrap(); mock_connect2.expect_id().return_const(s2_id); curp.set_connect( s2_id, diff --git a/crates/curp/src/server/metrics.rs b/crates/curp/src/server/metrics.rs index e0a9e31c1..16836636c 100644 --- a/crates/curp/src/server/metrics.rs +++ b/crates/curp/src/server/metrics.rs @@ -112,8 +112,8 @@ impl Metrics { observer.observe_u64(&has_leader, leader_id.map_or(0, |_| 1), &[]); observer.observe_u64(&is_leader, u64::from(leader), &[]); - let learner = curp.cluster().self_member().is_learner(); - let id = curp.cluster().self_id(); + let learner = curp.is_learner(); + let id = curp.id(); observer.observe_u64(&is_learner, u64::from(learner), &[]); observer.observe_u64(&server_id, id, &[]); diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index faae437f4..c5d40eaf5 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -21,7 +21,6 @@ pub use self::raw_curp::RawCurp; use crate::cmd::Command; use crate::cmd::CommandExecutor; use crate::member::MembershipInfo; -use crate::members::ClusterInfo; use crate::members::ServerId; use crate::response::ResponseSender; use crate::role_change::RoleChange; @@ -352,7 +351,6 @@ impl, RC: RoleChange> Rpc { #[allow(clippy::too_many_arguments)] // TODO: refactor this use builder pattern pub fn new( membership_info: MembershipInfo, - cluster_info: Arc, is_leader: bool, executor: Arc, snapshot_allocator: Box, @@ -367,7 +365,6 @@ impl, RC: RoleChange> Rpc { #[allow(clippy::panic)] let curp_node = match CurpNode::new( membership_info, - cluster_info, is_leader, executor, snapshot_allocator, diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 1c563ec28..e8ac0e1cc 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -68,7 +68,6 @@ use crate::log_entry::LogEntry; use crate::member::Membership; use crate::member::MembershipInfo; use crate::member::NodeMembershipState; -use crate::members::ClusterInfo; use crate::members::ServerId; use crate::quorum::QuorumSet; use crate::response::ResponseSender; @@ -127,8 +126,6 @@ pub(super) struct RawCurpArgs { membership_info: MembershipInfo, /// Member connects member_connects: BTreeMap, - /// Cluster information - cluster_info: Arc, /// Current node is leader or not is_leader: bool, /// Cmd board for tracking the cmd sync results @@ -187,7 +184,6 @@ impl RawCurpBuilder { let log = RwLock::new(Log::new(args.cfg.batch_max_size, args.cfg.log_entries_cap)); let ctx = Context::builder() - .cluster_info(args.cluster_info) .cb(args.cmd_board) .lm(args.lease_manager) .cfg(args.cfg) @@ -331,8 +327,6 @@ enum Role { #[derive(Builder)] #[builder(build_fn(skip))] struct Context { - /// Cluster information - cluster_info: Arc, /// Config cfg: Arc, /// Client tls config @@ -384,10 +378,6 @@ impl ContextBuilder { /// Build the context from the builder pub(super) fn build(&mut self) -> Result, ContextBuilderError> { Ok(Context { - cluster_info: match self.cluster_info.take() { - Some(value) => value, - None => return Err(ContextBuilderError::UninitializedField("cluster_info")), - }, cfg: match self.cfg.take() { Some(value) => value, None => return Err(ContextBuilderError::UninitializedField("cfg")), @@ -454,7 +444,6 @@ impl ContextBuilder { impl Debug for Context { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Context") - .field("cluster_info", &self.cluster_info) .field("cfg", &self.cfg) .field("cb", &self.cb) .field("leader_tx", &self.leader_tx) @@ -928,7 +917,7 @@ impl RawCurp { let st_r = self.st.read(); let log_r = self.log.read(); - let contains_candidate = self.cluster().contains(candidate_id); + let contains_candidate = self.ms.map_read(|ms| ms.check_membership(candidate_id)); // extra check to shutdown removed node if !contains_candidate { debug!( @@ -1020,7 +1009,12 @@ impl RawCurp { self.become_leader(&mut st_w); // update next_index for each follower - for other in self.ctx.cluster_info.peers_ids() { + let peers = ms_r + .cluster() + .effective() + .members() + .filter_map(|(id, _)| (id != ms_r.node_id()).then_some(id)); + for other in peers { self.lst.update_next_index(other, last_log_index + 1); // iter from the end to front is more likely to match the follower } if prev_last_log_index < last_log_index { @@ -1146,11 +1140,7 @@ impl RawCurp { if st_r.role != Role::Leader { return Err(CurpError::redirect(st_r.leader_id, st_r.term)); } - if !self - .cluster() - .get(&target_id) - .is_some_and(|m| !m.is_learner) - { + if !self.ms.map_read(|ms| ms.check_membership(target_id)) { return Err(CurpError::LeaderTransfer( "target node does not exist or it is a learner".to_owned(), )); @@ -1186,7 +1176,7 @@ impl RawCurp { if st_w.role == Role::Leader { return None; } - if self.cluster().self_member().is_learner() { + if !self.ms.read().is_member() { return None; } let mut cst_l = self.cst.lock(); @@ -1216,14 +1206,22 @@ impl RawCurp { self.log.read().commit_index } - /// Get cluster info - pub(super) fn cluster(&self) -> &ClusterInfo { - self.ctx.cluster_info.as_ref() + #[allow(clippy::unused_self)] + #[cfg(test)] + /// Get cluster id by it's name + pub(super) fn get_id_by_name(&self, _name: impl AsRef) -> Option { + // FIXME: implement logic + None } /// Get self's id pub(super) fn id(&self) -> ServerId { - self.ctx.cluster_info.self_id() + self.ms.read().node_id() + } + + /// Retruns `true` if the current node is a learner + pub(super) fn is_learner(&self) -> bool { + !self.ms.read().is_member() } /// Get self's node id diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index 154595d1c..08c388cb9 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -38,19 +38,15 @@ impl RawCurp { role_change: TestRoleChange, task_manager: Arc, ) -> Self { - let all_members: HashMap<_, _> = (0..n) - .map(|i| (format!("S{i}"), vec![format!("S{i}")])) - .collect(); - let cluster_info = Arc::new(ClusterInfo::from_members_map(all_members, [], "S0")); + let peer_ids: Vec<_> = (1..n).collect(); let cmd_board = Arc::new(RwLock::new(CommandBoard::new())); let lease_manager = Arc::new(RwLock::new(LeaseManager::new())); - let sync_events = cluster_info - .peers_ids() + let sync_events = peer_ids + .clone() .into_iter() .map(|id| (id, Arc::new(Event::new()))) .collect(); - let connects = cluster_info - .peers_ids() + let connects = peer_ids .into_iter() .map(|id| { ( @@ -81,7 +77,6 @@ impl RawCurp { let id_barrier = Arc::new(IdBarrier::new()); Self::builder() - .cluster_info(cluster_info) .is_leader(true) .cmd_board(cmd_board) .lease_manager(lease_manager) @@ -230,7 +225,7 @@ fn heartbeat_will_calibrate_term() { let task_manager = Arc::new(TaskManager::new()); let curp = { RawCurp::new_test(3, mock_role_change(), task_manager) }; - let s1_id = curp.cluster().get_id_by_name("S1").unwrap(); + let s1_id = curp.get_id_by_name("S1").unwrap(); let result = curp.handle_append_entries_resp(s1_id, None, 2, false, 1); assert!(result.is_err()); @@ -245,7 +240,7 @@ fn heartbeat_will_calibrate_next_index() { let task_manager = Arc::new(TaskManager::new()); let curp = RawCurp::new_test(3, mock_role_change(), task_manager); - let s1_id = curp.cluster().get_id_by_name("S1").unwrap(); + let s1_id = curp.get_id_by_name("S1").unwrap(); let result = curp.handle_append_entries_resp(s1_id, None, 0, false, 1); assert_eq!(result, Ok(false)); @@ -260,7 +255,7 @@ fn handle_ae_will_calibrate_term() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; curp.update_to_term_and_become_follower(&mut *curp.st.write(), 1); - let s2_id = curp.cluster().get_id_by_name("S2").unwrap(); + let s2_id = curp.get_id_by_name("S2").unwrap(); let result = curp.handle_append_entries(2, s2_id, 0, 0, vec![], 0, |_, _, _| {}); assert!(result.is_ok()); @@ -278,7 +273,7 @@ fn handle_ae_will_set_leader_id() { let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; curp.update_to_term_and_become_follower(&mut *curp.st.write(), 1); - let s2_id = curp.cluster().get_id_by_name("S2").unwrap(); + let s2_id = curp.get_id_by_name("S2").unwrap(); let result = curp.handle_append_entries(1, s2_id, 0, 0, vec![], 0, |_, _, _| {}); assert!(result.is_ok()); @@ -295,7 +290,7 @@ fn handle_ae_will_reject_wrong_term() { let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; curp.update_to_term_and_become_follower(&mut *curp.st.write(), 1); - let s2_id = curp.cluster().get_id_by_name("S2").unwrap(); + let s2_id = curp.get_id_by_name("S2").unwrap(); let result = curp.handle_append_entries(0, s2_id, 0, 0, vec![], 0, |_, _, _| {}); assert!(result.is_err()); assert_eq!(result.unwrap_err().0, 1); @@ -308,7 +303,7 @@ fn handle_ae_will_reject_wrong_log() { let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; curp.update_to_term_and_become_follower(&mut *curp.st.write(), 1); - let s2_id = curp.cluster().get_id_by_name("S2").unwrap(); + let s2_id = curp.get_id_by_name("S2").unwrap(); let result = curp.handle_append_entries( 1, s2_id, @@ -400,7 +395,7 @@ fn handle_vote_will_calibrate_term() { let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; curp.st.write().leader_id = None; - let s1_id = curp.cluster().get_id_by_name("S1").unwrap(); + let s1_id = curp.get_id_by_name("S1").unwrap(); let result = curp.handle_vote(2, s1_id, 0, 0).unwrap(); assert_eq!(result.0, 2); @@ -415,7 +410,7 @@ fn handle_vote_will_reject_smaller_term() { let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; curp.update_to_term_and_become_follower(&mut *curp.st.write(), 2); - let s1_id = curp.cluster().get_id_by_name("S1").unwrap(); + let s1_id = curp.get_id_by_name("S1").unwrap(); let result = curp.handle_vote(1, s1_id, 0, 0); assert_eq!(result.unwrap_err(), Some(2)); } @@ -425,7 +420,7 @@ fn handle_vote_will_reject_smaller_term() { fn handle_vote_will_reject_outdated_candidate() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; - let s2_id = curp.cluster().get_id_by_name("S2").unwrap(); + let s2_id = curp.get_id_by_name("S2").unwrap(); let result = curp.handle_append_entries( 2, s2_id, @@ -442,7 +437,7 @@ fn handle_vote_will_reject_outdated_candidate() { ); assert!(result.is_ok()); curp.st.write().leader_id = None; - let s1_id = curp.cluster().get_id_by_name("S1").unwrap(); + let s1_id = curp.get_id_by_name("S1").unwrap(); let result = curp.handle_vote(3, s1_id, 0, 0); assert_eq!(result.unwrap_err(), Some(3)); } @@ -459,12 +454,12 @@ fn pre_candidate_will_become_candidate_then_become_leader_after_election_succeed let _ig = curp.tick_election(); } - let s1_id = curp.cluster().get_id_by_name("S1").unwrap(); + let s1_id = curp.get_id_by_name("S1").unwrap(); let result = curp.handle_pre_vote_resp(s1_id, 2, true).unwrap(); assert!(result.is_some()); assert_eq!(curp.role(), Role::Candidate); - let s2_id = curp.cluster().get_id_by_name("S2").unwrap(); + let s2_id = curp.get_id_by_name("S2").unwrap(); let result = curp.handle_pre_vote_resp(s2_id, 2, true); assert!(result.is_err()); assert_eq!(curp.role(), Role::Candidate); @@ -490,7 +485,7 @@ fn vote_will_calibrate_pre_candidate_term() { let _ig = curp.tick_election(); } - let s1_id = curp.cluster().get_id_by_name("S1").unwrap(); + let s1_id = curp.get_id_by_name("S1").unwrap(); let result = curp.handle_vote_resp(s1_id, 3, false, vec![]); assert!(result.is_err()); @@ -517,11 +512,11 @@ fn recover_from_spec_pools_will_pick_the_correct_cmds() { curp.push_cmd(ProposeId(TEST_CLIENT_ID, 0), Arc::clone(&cmd0)); curp.log.map_write(|mut log_w| log_w.commit_index = 1); - let s0_id = curp.cluster().get_id_by_name("S0").unwrap(); - let s1_id = curp.cluster().get_id_by_name("S1").unwrap(); - let s2_id = curp.cluster().get_id_by_name("S2").unwrap(); - let s3_id = curp.cluster().get_id_by_name("S3").unwrap(); - let s4_id = curp.cluster().get_id_by_name("S4").unwrap(); + let s0_id = curp.get_id_by_name("S0").unwrap(); + let s1_id = curp.get_id_by_name("S1").unwrap(); + let s2_id = curp.get_id_by_name("S2").unwrap(); + let s3_id = curp.get_id_by_name("S3").unwrap(); + let s4_id = curp.get_id_by_name("S4").unwrap(); let spec_pools = BTreeMap::from([ ( @@ -672,8 +667,8 @@ fn is_synced_should_return_true_when_followers_caught_up_with_leader() { let task_manager = Arc::new(TaskManager::new()); let curp = { RawCurp::new_test(3, mock_role_change(), task_manager) }; - let s1_id = curp.cluster().get_id_by_name("S1").unwrap(); - let s2_id = curp.cluster().get_id_by_name("S2").unwrap(); + let s1_id = curp.get_id_by_name("S1").unwrap(); + let s2_id = curp.get_id_by_name("S2").unwrap(); curp.log.write().commit_index = 3; assert!(!curp.is_synced(s1_id)); assert!(!curp.is_synced(s2_id)); @@ -740,7 +735,7 @@ fn add_learner_node_and_promote_should_success() { fn add_exists_node_should_return_node_already_exists_error() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; - let exists_node_id = curp.cluster().get_id_by_name("S1").unwrap(); + let exists_node_id = curp.get_id_by_name("S1").unwrap(); let changes = vec![ConfChange::add( exists_node_id, vec!["http://127.0.0.1:4567".to_owned()], @@ -757,7 +752,7 @@ fn remove_node_should_remove_node_from_curp() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(5, mock_role_change(), task_manager)) }; let old_cluster = curp.cluster().clone(); - let follower_id = curp.cluster().get_id_by_name("S1").unwrap(); + let follower_id = curp.get_id_by_name("S1").unwrap(); let changes = vec![ConfChange::remove(follower_id)]; assert!(curp.check_new_config(&changes).is_ok()); let infos = curp.apply_conf_change(changes.clone()).unwrap(); @@ -794,7 +789,7 @@ fn update_node_should_update_the_address_of_node() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; let old_cluster = curp.cluster().clone(); - let follower_id = curp.cluster().get_id_by_name("S1").unwrap(); + let follower_id = curp.get_id_by_name("S1").unwrap(); let mut mock_connect = MockInnerConnectApi::new(); mock_connect.expect_update_addrs().returning(|_| Ok(())); curp.set_connect( @@ -835,7 +830,7 @@ fn update_node_should_update_the_address_of_node() { fn leader_handle_propose_conf_change() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; - let follower_id = curp.cluster().get_id_by_name("S1").unwrap(); + let follower_id = curp.get_id_by_name("S1").unwrap(); assert_eq!( curp.cluster().peer_urls(follower_id), Some(vec!["S1".to_owned()]) @@ -856,7 +851,7 @@ fn follower_handle_propose_conf_change() { let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; curp.update_to_term_and_become_follower(&mut *curp.st.write(), 2); - let follower_id = curp.cluster().get_id_by_name("S1").unwrap(); + let follower_id = curp.get_id_by_name("S1").unwrap(); assert_eq!( curp.cluster().peer_urls(follower_id), Some(vec!["S1".to_owned()]) @@ -889,7 +884,7 @@ fn leader_handle_move_leader() { let res = curp.handle_move_leader(12345); assert!(res.is_err()); - let target_id = curp.cluster().get_id_by_name("S1").unwrap(); + let target_id = curp.get_id_by_name("S1").unwrap(); let res = curp.handle_move_leader(target_id); // need to send try become leader now after handle_move_leader assert!(res.is_ok_and(|b| b)); @@ -906,7 +901,7 @@ fn follower_handle_move_leader() { let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; curp.update_to_term_and_become_follower(&mut *curp.st.write(), 2); - let target_id = curp.cluster().get_id_by_name("S1").unwrap(); + let target_id = curp.get_id_by_name("S1").unwrap(); let res = curp.handle_move_leader(target_id); assert!(matches!(res, Err(CurpError::Redirect(_)))); } @@ -918,7 +913,7 @@ fn leader_will_reset_transferee_after_remove_node() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(5, mock_role_change(), task_manager)) }; - let target_id = curp.cluster().get_id_by_name("S1").unwrap(); + let target_id = curp.get_id_by_name("S1").unwrap(); let res = curp.handle_move_leader(target_id); assert!(res.is_ok_and(|b| b)); assert_eq!(curp.get_transferee(), Some(target_id)); @@ -935,7 +930,7 @@ fn leader_will_reject_propose_when_transferring() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(5, mock_role_change(), task_manager)) }; - let target_id = curp.cluster().get_id_by_name("S1").unwrap(); + let target_id = curp.get_id_by_name("S1").unwrap(); let res = curp.handle_move_leader(target_id); assert!(res.is_ok_and(|b| b)); @@ -951,7 +946,7 @@ fn leader_will_reset_transferee_after_it_become_follower() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(5, mock_role_change(), task_manager)) }; - let target_id = curp.cluster().get_id_by_name("S1").unwrap(); + let target_id = curp.get_id_by_name("S1").unwrap(); let res = curp.handle_move_leader(target_id); assert!(res.is_ok_and(|b| b)); assert_eq!(curp.get_transferee(), Some(target_id)); diff --git a/crates/curp/tests/it/common/curp_group.rs b/crates/curp/tests/it/common/curp_group.rs index b904f2691..2eeee176a 100644 --- a/crates/curp/tests/it/common/curp_group.rs +++ b/crates/curp/tests/it/common/curp_group.rs @@ -153,7 +153,6 @@ impl CurpGroup { let curp_storage = Arc::new(DB::open(&config.engine_cfg).unwrap()); let server = Arc::new(Rpc::new( membership_info, - cluster_info, name == leader_name, ce, snapshot_allocator, @@ -294,7 +293,6 @@ impl CurpGroup { let membership_info = MembershipInfo::new(node_id as u64, init_members); let server = Arc::new(Rpc::new( membership_info, - cluster_info, false, ce, snapshot_allocator, diff --git a/crates/xline/src/server/lease_server.rs b/crates/xline/src/server/lease_server.rs index 1dca749f7..1fab5aad2 100644 --- a/crates/xline/src/server/lease_server.rs +++ b/crates/xline/src/server/lease_server.rs @@ -2,7 +2,6 @@ use std::{pin::Pin, sync::Arc, time::Duration}; use async_stream::{stream, try_stream}; use clippy_utilities::NumericCast; -use curp::members::ClusterInfo; use futures::stream::Stream; use tokio::time; #[cfg(not(madsim))] @@ -44,8 +43,6 @@ pub(crate) struct LeaseServer { client: Arc, /// Id generator id_gen: Arc, - /// cluster information - cluster_info: Arc, /// Client tls config client_tls_config: Option, /// Task manager @@ -63,7 +60,6 @@ impl LeaseServer { auth_storage: Arc, client: Arc, id_gen: Arc, - cluster_info: Arc, client_tls_config: Option, task_manager: &Arc, ) -> Arc { @@ -72,7 +68,6 @@ impl LeaseServer { auth_storage, client, id_gen, - cluster_info, client_tls_config, task_manager: Arc::clone(task_manager), }); @@ -307,17 +302,13 @@ impl Lease for LeaseServer { if self.lease_storage.is_primary() { break self.leader_keep_alive(request_stream)?; } - let leader_id = self.client.fetch_leader_id(false).await?; + let _leader_id = self.client.fetch_leader_id(false).await?; // Given that a candidate server may become a leader when it won the election or // a follower when it lost the election. Therefore we need to double check here. // We can directly invoke leader_keep_alive when a candidate becomes a leader. if !self.lease_storage.is_primary() { - let leader_addrs = self.cluster_info.client_urls(leader_id).unwrap_or_else(|| { - unreachable!( - "The address of leader {} not found in all_members {:?}", - leader_id, self.cluster_info - ) - }); + // FIXME: get leader address + let leader_addrs = vec![]; break self .follower_keep_alive(request_stream, &leader_addrs) .await?; @@ -355,13 +346,8 @@ impl Lease for LeaseServer { }; return Ok(tonic::Response::new(res)); } - let leader_id = self.client.fetch_leader_id(false).await?; - let leader_addrs = self.cluster_info.client_urls(leader_id).unwrap_or_else(|| { - unreachable!( - "The address of leader {} not found in all_members {:?}", - leader_id, self.cluster_info - ) - }); + let _leader_id = self.client.fetch_leader_id(false).await?; + let leader_addrs = vec![]; // FIXME: get leader address if !self.lease_storage.is_primary() { let endpoints = build_endpoints(&leader_addrs, self.client_tls_config.as_ref())?; let channel = tonic::transport::Channel::balance_list(endpoints.into_iter()); diff --git a/crates/xline/src/server/maintenance.rs b/crates/xline/src/server/maintenance.rs index 9ecf80209..9fb9c4ce7 100644 --- a/crates/xline/src/server/maintenance.rs +++ b/crates/xline/src/server/maintenance.rs @@ -3,7 +3,7 @@ use std::{fmt::Debug, pin::Pin, sync::Arc}; use async_stream::try_stream; use bytes::BytesMut; use clippy_utilities::{NumericCast, OverflowArithmetic}; -use curp::{cmd::CommandExecutor as _, members::ClusterInfo, server::RawCurp}; +use curp::{cmd::CommandExecutor as _, server::RawCurp}; use engine::SnapshotApi; use futures::stream::Stream; use sha2::{Digest, Sha256}; @@ -43,8 +43,6 @@ pub(crate) struct MaintenanceServer { header_gen: Arc, /// Consensus client client: Arc, - /// cluster information - cluster_info: Arc, /// Raw curp raw_curp: Arc>>>, /// Command executor @@ -62,7 +60,6 @@ impl MaintenanceServer { client: Arc, db: Arc, header_gen: Arc, - cluster_info: Arc, raw_curp: Arc>>>, ce: Arc, alarm_store: Arc, @@ -73,7 +70,6 @@ impl MaintenanceServer { db, header_gen, client, - cluster_info, raw_curp, ce, alarm_store, @@ -118,7 +114,8 @@ impl Maintenance for MaintenanceServer { &self, _request: tonic::Request, ) -> Result, tonic::Status> { - let is_learner = self.cluster_info.self_member().is_learner; + // FIXME: get learner status + let is_learner = false; let (leader, term, _) = self.raw_curp.leader(); let commit_index = self.raw_curp.commit_index(); let size = self.db.file_size().map_err(|e| { diff --git a/crates/xline/src/server/xline_server.rs b/crates/xline/src/server/xline_server.rs index ff22732fe..415869397 100644 --- a/crates/xline/src/server/xline_server.rs +++ b/crates/xline/src/server/xline_server.rs @@ -4,10 +4,9 @@ use anyhow::{anyhow, Result}; use clippy_utilities::{NumericCast, OverflowArithmetic}; use curp::{ client::ClientBuilder as CurpClientBuilder, - member::MembershipInfo, - members::ClusterInfo, + member::{ClusterId, MembershipInfo}, rpc::{InnerProtocolServer, ProtocolServer}, - server::{Rpc, StorageApi as _, DB as CurpDB}, + server::{Rpc, DB as CurpDB}, }; use dashmap::DashMap; use engine::{MemorySnapshotAllocator, RocksSnapshotAllocator, SnapshotAllocator}; @@ -22,13 +21,10 @@ use tonic::transport::{ server::Connected, Certificate, ClientTlsConfig, Identity, ServerTlsConfig, }; use tonic::transport::{server::Router, Server}; -use tracing::{info, warn}; +use tracing::info; use utils::{ barrier::IdBarrier, - config::{ - AuthConfig, ClusterConfig, CompactConfig, EngineConfig, InitialClusterState, StorageConfig, - TlsConfig, - }, + config::{AuthConfig, ClusterConfig, CompactConfig, EngineConfig, StorageConfig, TlsConfig}, task_manager::{tasks::TaskName, TaskManager}, }; #[cfg(madsim)] @@ -76,8 +72,6 @@ pub(crate) type CurpServer = Rpc pub struct XlineServer { /// Membership information membership_info: MembershipInfo, - /// Cluster information - cluster_info: Arc, /// Cluster Config cluster_config: ClusterConfig, /// Storage config, @@ -116,18 +110,12 @@ impl XlineServer { #[cfg(madsim)] let (client_tls_config, server_tls_config) = (None, None); let curp_storage = Arc::new(CurpDB::open(&cluster_config.curp_config().engine_cfg)?); - let cluster_info = Arc::new(Self::init_cluster_info( - &cluster_config, - curp_storage.as_ref(), - client_tls_config.as_ref(), - )?); let membership_info = MembershipInfo::new( *cluster_config.node_id(), cluster_config.initial_membership_info().clone(), ); Ok(Self { - cluster_info, cluster_config, storage_config, compact_config, @@ -140,44 +128,6 @@ impl XlineServer { }) } - #[allow(clippy::todo)] - /// Init cluster info from cluster config - fn init_cluster_info( - cluster_config: &ClusterConfig, - curp_storage: &CurpDB, - _tls_config: Option<&ClientTlsConfig>, - ) -> Result { - info!("name = {:?}", cluster_config.name()); - info!("cluster_peers = {:?}", cluster_config.peers()); - - let name = cluster_config.name().clone(); - let all_members = cluster_config.peers().clone(); - let self_client_urls = cluster_config.client_advertise_urls().clone(); - match ( - curp_storage.recover_cluster_info()?, - *cluster_config.initial_cluster_state(), - ) { - (Some(cluster_info), _) => { - info!("get cluster_info from local"); - Ok(cluster_info) - } - (None, InitialClusterState::New) => { - info!("get cluster_info by args"); - let cluster_info = - ClusterInfo::from_members_map(all_members, self_client_urls, &name); - curp_storage.put_cluster_info(&cluster_info)?; - Ok(cluster_info) - } - (None, InitialClusterState::Existing) => { - // FIXME - todo!("adding a new member to the cluster"); - } - (None, _) => { - unreachable!("xline only supports two initial cluster states: new, existing") - } - } - } - /// Construct a `LeaseCollection` #[inline] #[allow(clippy::arithmetic_side_effects)] // never overflow @@ -268,9 +218,11 @@ impl XlineServer { /// Construct a header generator #[inline] - fn construct_generator(cluster_info: &ClusterInfo) -> (Arc, Arc) { - let member_id = cluster_info.self_id(); - let cluster_id = cluster_info.cluster_id(); + fn construct_generator( + membership_info: &MembershipInfo, + ) -> (Arc, Arc) { + let member_id = membership_info.node_id; + let cluster_id = membership_info.cluster_id(); ( Arc::new(HeaderGenerator::new(cluster_id, member_id)), Arc::new(IdGenerator::new(member_id)), @@ -374,7 +326,7 @@ impl XlineServer { { let db = DB::open(&self.storage_config.engine)?; let key_pair = Self::read_key_pair(&self.auth_config).await?; - let (xline_router, curp_router, curp_client) = self.init_router(db, key_pair).await?; + let (xline_router, curp_router, _curp_client) = self.init_router(db, key_pair).await?; self.task_manager .spawn(TaskName::TonicServer, |n1| async move { let n2 = n1.clone(); @@ -383,9 +335,7 @@ impl XlineServer { _ = curp_router.serve_with_incoming_shutdown(curp_incoming, n2.wait()) => {}, } }); - if let Err(e) = self.publish(curp_client).await { - warn!("publish name to cluster failed: {e:?}"); - }; + Ok(()) } @@ -447,7 +397,7 @@ impl XlineServer { AuthWrapper, Arc, )> { - let (header_gen, id_gen) = Self::construct_generator(&self.cluster_info); + let (header_gen, id_gen) = Self::construct_generator(&self.membership_info); let lease_collection = Self::construct_lease_collection( self.cluster_config.curp_config().heartbeat_interval, self.cluster_config.curp_config().candidate_timeout_ticks, @@ -504,7 +454,6 @@ impl XlineServer { let curp_server = CurpServer::new( self.membership_info.clone(), - Arc::clone(&self.cluster_info), *self.cluster_config.is_leader(), Arc::clone(&ce), snapshot_allocator, @@ -520,9 +469,14 @@ impl XlineServer { let client = Arc::new( CurpClientBuilder::new(*self.cluster_config.client_config(), true) .tls_config(self.client_tls_config.clone()) - .cluster_version(self.cluster_info.cluster_version()) - .init_nodes(self.cluster_info.all_members_peer_urls().values().cloned()) - .bypass(self.cluster_info.self_id(), curp_server.clone()) + .init_nodes( + self.membership_info + .init_members + .values() + .cloned() + .map(|addr| vec![addr]), + ) + .bypass(self.membership_info.node_id, curp_server.clone()) .build::()?, ) as Arc; @@ -530,7 +484,7 @@ impl XlineServer { compactor.set_compactable(Arc::clone(&client)).await; } ce.set_alarmer(Alarmer::new( - self.cluster_info.self_id(), + self.membership_info.node_id, Arc::clone(&client), )); let raw_curp = curp_server.raw_curp(); @@ -538,6 +492,13 @@ impl XlineServer { Metrics::register_callback()?; let server_timeout = self.cluster_config.server_timeout(); + let self_addrs: Vec<_> = self + .membership_info + .init_members + .get(&self.membership_info.node_id) + .cloned() + .into_iter() + .collect(); Ok(( KvServer::new( Arc::clone(&kv_storage), @@ -550,7 +511,7 @@ impl XlineServer { Arc::clone(&client), Arc::clone(&auth_storage), Arc::clone(&id_gen), - &self.cluster_info.self_peer_urls(), + &self_addrs, self.client_tls_config.as_ref(), ), LeaseServer::new( @@ -558,7 +519,6 @@ impl XlineServer { Arc::clone(&auth_storage), Arc::clone(&client), id_gen, - Arc::clone(&self.cluster_info), self.client_tls_config.clone(), &self.task_manager, ), @@ -575,7 +535,6 @@ impl XlineServer { Arc::clone(&client), db, Arc::clone(&header_gen), - Arc::clone(&self.cluster_info), raw_curp, ce, alarm_storage, @@ -587,17 +546,6 @@ impl XlineServer { )) } - /// Publish the name of current node to cluster - async fn publish(&self, curp_client: Arc) -> Result<(), tonic::Status> { - curp_client - .propose_publish( - self.cluster_info.self_id(), - self.cluster_info.self_name(), - self.cluster_info.self_client_urls(), - ) - .await - } - /// Stop `XlineServer` #[inline] pub async fn stop(&self) { From 326930f707f35b6e5d70c6314a3c0f34ab83e2d3 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 12 Sep 2024 17:40:21 +0800 Subject: [PATCH 168/322] feat: implement membership persistent Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 4 +- crates/curp/src/server/storage/db.rs | 113 ++++++-------------------- crates/curp/src/server/storage/mod.rs | 47 ++++------- 3 files changed, 44 insertions(+), 120 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index aa89141f9..70951bf0f 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -171,8 +171,8 @@ impl NodeMembershipState { } /// Membership state stored in current node -#[derive(Debug, Default)] -pub(crate) struct MembershipState { +#[derive(Serialize, Deserialize, Debug, Default)] +pub struct MembershipState { /// Config that exist in log, but haven't committed effective: Membership, /// Index of the effective membership diff --git a/crates/curp/src/server/storage/db.rs b/crates/curp/src/server/storage/db.rs index 6d8963508..fddee997a 100644 --- a/crates/curp/src/server/storage/db.rs +++ b/crates/curp/src/server/storage/db.rs @@ -1,28 +1,17 @@ use std::ops::Deref; -use engine::{Engine, EngineType, StorageEngine, StorageOps, WriteOperation}; +use engine::{Engine, EngineType, StorageOps, WriteOperation}; use parking_lot::Mutex; -use prost::Message; use utils::config::EngineConfig; use super::{ wal::{codec::DataFrame, config::WALConfig, WALStorage, WALStorageOps}, RecoverData, StorageApi, StorageError, }; -use crate::{ - cmd::Command, - log_entry::LogEntry, - members::{ClusterInfo, ServerId}, - rpc::Member, -}; +use crate::{cmd::Command, log_entry::LogEntry, member::MembershipState, members::ServerId}; /// Key for persisted state const VOTE_FOR: &[u8] = b"VoteFor"; -/// Key for cluster id -const CLUSTER_ID: &[u8] = b"ClusterId"; -/// Key for member id -const MEMBER_ID: &[u8] = b"MemberId"; - /// Column family name for curp storage const CF: &str = "curp"; /// Column family name for members @@ -34,6 +23,9 @@ const ROCKSDB_SUB_DIR: &str = "rocksdb"; /// The sub dir for WAL files const WAL_SUB_DIR: &str = "wal"; +/// Keys for membership persistent +const MEMBERSHIP: &[u8] = b"membership"; + /// `DB` storage implementation #[derive(Debug)] pub struct DB { @@ -70,81 +62,6 @@ impl StorageApi for DB { .map_err(Into::into) } - #[inline] - fn put_member(&self, member: &Member) -> Result<(), StorageError> { - let id = member.id; - let data = member.encode_to_vec(); - let op = WriteOperation::new_put(MEMBERS_CF, id.to_le_bytes().to_vec(), data); - self.db.write_multi(vec![op], true)?; - Ok(()) - } - - #[inline] - fn remove_member(&self, id: ServerId) -> Result<(), StorageError> { - let id_bytes = id.to_le_bytes(); - let op = WriteOperation::new_delete(MEMBERS_CF, &id_bytes); - self.db.write_multi(vec![op], true)?; - Ok(()) - } - - #[inline] - fn put_cluster_info(&self, cluster_info: &ClusterInfo) -> Result<(), StorageError> { - let mut ops = Vec::new(); - ops.push(WriteOperation::new_put( - CF, - CLUSTER_ID.to_vec(), - cluster_info.cluster_id().to_le_bytes().to_vec(), - )); - ops.push(WriteOperation::new_put( - CF, - MEMBER_ID.to_vec(), - cluster_info.self_id().to_le_bytes().to_vec(), - )); - for m in cluster_info.all_members_vec() { - ops.push(WriteOperation::new_put( - MEMBERS_CF, - m.id.to_le_bytes().to_vec(), - m.encode_to_vec(), - )); - } - self.db.write_multi(ops, true)?; - Ok(()) - } - - #[inline] - fn recover_cluster_info(&self) -> Result, StorageError> { - let cluster_id = self.db.get(CF, CLUSTER_ID)?.map(|bytes| { - u64::from_le_bytes( - bytes - .as_slice() - .try_into() - .unwrap_or_else(|e| unreachable!("cannot decode index from backend, {e:?}")), - ) - }); - let member_id = self.db.get(CF, MEMBER_ID)?.map(|bytes| { - u64::from_le_bytes( - bytes - .as_slice() - .try_into() - .unwrap_or_else(|e| unreachable!("cannot decode index from backend, {e:?}")), - ) - }); - let mut members = vec![]; - for (_k, v) in self.db.get_all(MEMBERS_CF)? { - let member = Member::decode(v.as_ref())?; - members.push(member); - } - - let cluster_info = match (cluster_id, member_id, members.is_empty()) { - (Some(cluster_id), Some(member_id), false) => { - Some(ClusterInfo::new(cluster_id, member_id, members)) - } - _ => None, - }; - - Ok(cluster_info) - } - #[inline] fn recover(&self) -> Result, StorageError> { let entries = self.wal.lock().recover()?; @@ -155,6 +72,26 @@ impl StorageApi for DB { .transpose()?; Ok((voted_for, entries)) } + + #[inline] + fn put_membership( + &self, + node_id: u64, + membership: &MembershipState, + ) -> Result<(), StorageError> { + let data = bincode::serialize(&(node_id, membership))?; + let op = WriteOperation::new_put(CF, MEMBERSHIP.to_vec(), data); + self.db.write_multi(vec![op], true).map_err(Into::into) + } + + #[inline] + fn recover_membership(&self) -> Result, StorageError> { + self.db + .get(CF, MEMBERSHIP)? + .map(|bytes| bincode::deserialize::<(u64, MembershipState)>(&bytes)) + .transpose() + .map_err(Into::into) + } } impl DB { diff --git a/crates/curp/src/server/storage/mod.rs b/crates/curp/src/server/storage/mod.rs index f07ecc543..9f022acca 100644 --- a/crates/curp/src/server/storage/mod.rs +++ b/crates/curp/src/server/storage/mod.rs @@ -1,12 +1,7 @@ use engine::EngineError; use thiserror::Error; -use crate::{ - cmd::Command, - log_entry::LogEntry, - members::{ClusterInfo, ServerId}, - rpc::Member, -}; +use crate::{cmd::Command, log_entry::LogEntry, member::MembershipState, members::ServerId}; /// Storage layer error #[derive(Error, Debug)] @@ -55,30 +50,6 @@ pub trait StorageApi: Send + Sync { /// Return `StorageError` when it failed to store the `voted_for` info to underlying database. fn flush_voted_for(&self, term: u64, voted_for: ServerId) -> Result<(), StorageError>; - /// Put `Member` into storage - /// - /// # Errors - /// Return `StorageError` when it failed to store the member info to underlying database. - fn put_member(&self, member: &Member) -> Result<(), StorageError>; - - /// Remove `Member` from storage - /// - /// # Errors - /// Return `StorageError` when it failed to remove the member info from underlying database. - fn remove_member(&self, id: ServerId) -> Result<(), StorageError>; - - /// Put `ClusterInfo` into storage - /// - /// # Errors - /// Return `StorageError` when it failed to store the cluster info to underlying database. - fn put_cluster_info(&self, cluster_info: &ClusterInfo) -> Result<(), StorageError>; - - /// Recover `ClusterInfo` from storage - /// - /// # Errors - /// Return `StorageError` when it failed to recover the cluster info from underlying database. - fn recover_cluster_info(&self) -> Result, StorageError>; - /// Put log entries in storage /// /// # Errors @@ -91,6 +62,22 @@ pub trait StorageApi: Send + Sync { /// # Errors /// Return `StorageError` when it failed to recover the log entries and vote info from underlying database. fn recover(&self) -> Result, StorageError>; + + /// Put membership into the persisted storage + /// + /// # Errors + /// Return `StorageError` when it failed to store the membership to underlying database. + fn put_membership( + &self, + node_id: u64, + membership: &MembershipState, + ) -> Result<(), StorageError>; + + /// Recovers membership from the persisted storage + /// + /// # Errors + /// Return `StorageError` when it failed to recover the membership from underlying database. + fn recover_membership(&self) -> Result, StorageError>; } /// CURP `DB` storage implementation From cca809ad5ea8ec25258036eb962c0b90c569e31f Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 12 Sep 2024 18:09:17 +0800 Subject: [PATCH 169/322] feat: persist membership in raw_curp Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- .../curp/src/server/curp_node/member_impl.rs | 2 +- .../curp/src/server/raw_curp/member_impl.rs | 23 +++++++++++++++---- crates/curp/src/server/raw_curp/mod.rs | 6 ++++- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index dbca7eb91..a7fdb1dc4 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -92,7 +92,7 @@ impl, RC: RoleChange> CurpNode { }); }; for config in configs { - let propose_id = self.curp.update_membership(config, spawn_sync); + let propose_id = self.curp.update_membership(config, spawn_sync)?; self.curp.wait_propose_ids(Some(propose_id)).await; } diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 27c925bf0..9ffc19e92 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -15,6 +15,8 @@ use crate::member::NodeMembershipState; use crate::rpc::connect::InnerConnectApiWrapper; use crate::rpc::inner_connects; use crate::rpc::ProposeId; +use crate::server::StorageApi; +use crate::server::StorageError; use super::RawCurp; use super::Role; @@ -35,7 +37,11 @@ impl RawCurp { } /// Updates the membership config - pub(crate) fn update_membership(&self, config: Membership, spawn_sync: F) -> ProposeId + pub(crate) fn update_membership( + &self, + config: Membership, + spawn_sync: F, + ) -> Result where F: Fn(Arc, Arc, InnerConnectApiWrapper), { @@ -48,9 +54,12 @@ impl RawCurp { let new_connects = self.build_connects(&config); ms_w.cluster_mut().append(entry.index, config); let (removed, added) = ms_w.update_connects(&new_connects); + self.ctx + .curp_storage + .put_membership(ms_w.node_id(), ms_w.cluster())?; self.update_node_sync(removed, added, spawn_sync); - propose_id + Ok(propose_id) } /// Append membership entries @@ -60,7 +69,8 @@ impl RawCurp { truncate_at: LogIndex, commit_index: LogIndex, spawn_sync: F, - ) where + ) -> Result<(), StorageError> + where E: AsRef>, I: IntoIterator, F: Fn(Arc, Arc, InnerConnectApiWrapper), @@ -81,9 +91,14 @@ impl RawCurp { self.update_node_sync(removed, added, &spawn_sync); ms_w.cluster_mut().append(index, config); ms_w.cluster_mut().commit(commit_index.min(index)); + self.ctx + .curp_storage + .put_membership(ms_w.node_id(), ms_w.cluster())?; } self.update_role(&ms_w); + + Ok(()) } /// Updates the commit index @@ -118,7 +133,6 @@ impl RawCurp { } /// Updates the background task of node sync - /// TODO: member persistent fn update_node_sync( &self, removed: BTreeMap, @@ -141,7 +155,6 @@ impl RawCurp { remove_events_l.remove(&id).map(|e| e.notify(1)).is_some(), "id doesn't exist" ); - // TODO: update persistent membership } } } diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index e8ac0e1cc..d22413d20 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -771,7 +771,11 @@ impl RawCurp { let (to_persist, truncate_at) = log_w .try_append_entries(entries.clone(), prev_log_index, prev_log_term) .map_err(|_ig| (term, log_w.commit_index + 1))?; - self.append_membership(&entries, truncate_at, leader_commit, spawn_sync); + self.append_membership(&entries, truncate_at, leader_commit, spawn_sync) + .map_err(|err| { + error!("append memebrship entires failed: {err}"); + (term, log_w.commit_index + 1) + })?; // update commit index let prev_commit_index = log_w.commit_index; log_w.commit_index = min(leader_commit, log_w.last_log_index()); From fbc791840778fc3e745e2f2ae236ea2257b34018 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 12 Sep 2024 19:52:16 +0800 Subject: [PATCH 170/322] refactor: sync follower task Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/mod.rs | 20 +------------------- crates/curp/src/server/raw_curp/mod.rs | 12 ++++++++++++ 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 7e2e894d6..89f98d0a5 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -885,11 +885,10 @@ impl, RC: RoleChange> CurpNode { Self::election_task(Arc::clone(&curp), n) }); - let mut remove_events = HashMap::new(); curp.with_member_connects(|connects| { for c in connects.values() { let sync_event = curp.sync_event(c.id()); - let remove_event = Arc::new(Event::new()); + let remove_event = curp.remove_event(c.id()); task_manager.spawn(TaskName::SyncFollower, |n| { Self::sync_follower_task( @@ -900,25 +899,8 @@ impl, RC: RoleChange> CurpNode { n, ) }); - _ = remove_events.insert(c.id(), remove_event); } }); - // TODO: Remove this after new membership implementation - for c in curp.connects() { - let sync_event = curp.sync_event(c.id()); - let remove_event = Arc::new(Event::new()); - - task_manager.spawn(TaskName::SyncFollower, |n| { - Self::sync_follower_task( - Arc::clone(&curp), - c.value().clone(), - sync_event, - Arc::clone(&remove_event), - n, - ) - }); - _ = remove_events.insert(c.id(), remove_event); - } task_manager.spawn(TaskName::HandlePropose, |_n| { Self::handle_propose_task(Arc::clone(&cmd_executor), Arc::clone(&curp), propose_rx) diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index d22413d20..d6aa3688f 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -1356,6 +1356,18 @@ impl RawCurp { ) } + // TODO: we could directly abort the sync task instead of signal it manually + /// Get remove event + pub(super) fn remove_event(&self, id: ServerId) -> Arc { + Arc::clone( + self.ctx + .remove_events + .lock() + .get(&id) + .unwrap_or_else(|| unreachable!("server id {id} not found")), + ) + } + /// Check if the current node is shutting down pub(super) fn is_node_shutdown(&self) -> bool { self.task_manager.is_node_shutdown() From f8670c04951483d8d6a027f5502a0b8f1dd02002 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 13 Sep 2024 09:55:10 +0800 Subject: [PATCH 171/322] refactor: removes publish rpc Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/proto/common | 2 +- crates/curp/src/client/connect.rs | 17 ---------- crates/curp/src/client/keep_alive.rs | 16 ++-------- crates/curp/src/client/retry.rs | 24 -------------- crates/curp/src/client/unary/mod.rs | 27 ++-------------- crates/curp/src/log_entry/entry_data.rs | 11 +------ crates/curp/src/rpc/connect/mod.rs | 38 ++--------------------- crates/curp/src/rpc/mod.rs | 28 ----------------- crates/curp/src/rpc/reconnect.rs | 15 ++------- crates/curp/src/server/cmd_worker/mod.rs | 4 --- crates/curp/src/server/curp_node/mod.rs | 22 +++---------- crates/curp/src/server/mod.rs | 14 --------- crates/curp/src/server/raw_curp/mod.rs | 34 +------------------- crates/curp/tests/it/common/curp_group.rs | 2 -- crates/xline/src/server/auth_wrapper.rs | 14 ++------- 15 files changed, 22 insertions(+), 246 deletions(-) diff --git a/crates/curp/proto/common b/crates/curp/proto/common index 90e2b55d8..4cb05f81a 160000 --- a/crates/curp/proto/common +++ b/crates/curp/proto/common @@ -1 +1 @@ -Subproject commit 90e2b55d881b40cf28b2f69ae0e1424bbdc89416 +Subproject commit 4cb05f81af407874fd31a322f3a09bd8a5118509 diff --git a/crates/curp/src/client/connect.rs b/crates/curp/src/client/connect.rs index e0d9e0c29..9fcfb7741 100644 --- a/crates/curp/src/client/connect.rs +++ b/crates/curp/src/client/connect.rs @@ -36,14 +36,6 @@ pub trait ClientApi { /// Send propose to shutdown cluster async fn propose_shutdown(&self) -> Result<(), Self::Error>; - /// Send propose to publish a node id and name - async fn propose_publish( - &self, - node_id: ServerId, - node_name: String, - node_client_urls: Vec, - ) -> Result<(), Self::Error>; - /// Send move leader request async fn move_leader(&self, node_id: ServerId) -> Result<(), Self::Error>; @@ -102,15 +94,6 @@ pub(crate) trait RepeatableClientApi { /// Send propose to shutdown cluster async fn propose_shutdown(&self, ctx: Context) -> Result<(), Self::Error>; - /// Send propose to publish a node id and name - async fn propose_publish( - &self, - node_id: ServerId, - node_name: String, - node_client_urls: Vec, - ctx: Context, - ) -> Result<(), Self::Error>; - /// Send move leader request async fn move_leader(&self, node_id: u64, ctx: Context) -> Result<(), Self::Error>; diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index 60aab646b..31b62b470 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -159,10 +159,9 @@ mod tests { AddLearnerRequest, AddLearnerResponse, AddMemberRequest, AddMemberResponse, CurpError, FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, FetchReadStateResponse, Member, MoveLeaderRequest, MoveLeaderResponse, OpResponse, - ProposeId, ProposeRequest, ProposeResponse, PublishRequest, PublishResponse, - ReadIndexResponse, RecordRequest, RecordResponse, RemoveLearnerRequest, - RemoveLearnerResponse, RemoveMemberRequest, RemoveMemberResponse, ResponseOp, - ShutdownRequest, ShutdownResponse, SyncedResponse, + ProposeId, ProposeRequest, ProposeResponse, ReadIndexResponse, RecordRequest, + RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, RemoveMemberRequest, + RemoveMemberResponse, ResponseOp, ShutdownRequest, ShutdownResponse, SyncedResponse, }, }; @@ -214,15 +213,6 @@ mod tests { unreachable!("please use MockedConnectApi") } - /// Send `PublishRequest` - async fn publish( - &self, - _request: PublishRequest, - _timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") - } - /// Send `ShutdownRequest` async fn shutdown( &self, diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 1d0469527..962887915 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -406,30 +406,6 @@ where .await } - /// Send propose to publish a node id and name - async fn propose_publish( - &self, - node_id: ServerId, - node_name: String, - node_client_urls: Vec, - ) -> Result<(), Self::Error> { - self.retry::<_, _>(|client, ctx| { - let name_c = node_name.clone(); - let node_client_urls_c = node_client_urls.clone(); - async move { - RepeatableClientApi::propose_publish( - client, - node_id, - name_c, - node_client_urls_c, - ctx, - ) - .await - } - }) - .await - } - /// Send move leader request async fn move_leader(&self, node_id: u64) -> Result<(), Self::Error> { self.retry::<_, _>(|client, ctx| client.move_leader(node_id, ctx)) diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 954d95773..9f20f1374 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -12,12 +12,9 @@ use super::{ connect::{ProposeResponse, RepeatableClientApi}, retry::Context, }; -use crate::{ - members::ServerId, - rpc::{ - AddLearnerRequest, AddMemberRequest, CurpError, FetchReadStateRequest, MoveLeaderRequest, - PublishRequest, ReadState, RemoveLearnerRequest, RemoveMemberRequest, ShutdownRequest, - }, +use crate::rpc::{ + AddLearnerRequest, AddMemberRequest, CurpError, FetchReadStateRequest, MoveLeaderRequest, + ReadState, RemoveLearnerRequest, RemoveMemberRequest, ShutdownRequest, }; /// The unary client @@ -76,24 +73,6 @@ impl RepeatableClientApi for Unary { Ok(()) } - /// Send propose to publish a node id and name - async fn propose_publish( - &self, - node_id: ServerId, - node_name: String, - node_client_urls: Vec, - ctx: Context, - ) -> Result<(), Self::Error> { - let req = PublishRequest::new(ctx.propose_id(), node_id, node_name, node_client_urls); - let timeout = self.config.wait_synced_timeout(); - let _resp = ctx - .cluster_state() - .map_leader(|conn| async move { conn.publish(req, timeout).await }) - .await?; - - Ok(()) - } - /// Send move leader request async fn move_leader(&self, node_id: u64, ctx: Context) -> Result<(), Self::Error> { let req = MoveLeaderRequest::new(node_id, 0); diff --git a/crates/curp/src/log_entry/entry_data.rs b/crates/curp/src/log_entry/entry_data.rs index 8eed4cfd3..c255d09cd 100644 --- a/crates/curp/src/log_entry/entry_data.rs +++ b/crates/curp/src/log_entry/entry_data.rs @@ -4,9 +4,8 @@ use serde::Deserialize; use serde::Serialize; use crate::member::Membership; -use crate::members::ServerId; -use crate::rpc::PublishRequest; +#[allow(variant_size_differences)] // The `Membership` won't be too large /// Entry data of a `LogEntry` #[derive(Debug, Clone, Serialize, Deserialize)] #[cfg_attr(test, derive(PartialEq))] @@ -17,8 +16,6 @@ pub(crate) enum EntryData { Command(Arc), /// `Shutdown` entry Shutdown, - /// `SetNodeState` entry - SetNodeState(ServerId, String, Vec), /// `Member` entry Member(Membership), } @@ -29,12 +26,6 @@ impl From> for EntryData { } } -impl From for EntryData { - fn from(value: PublishRequest) -> Self { - EntryData::SetNodeState(value.node_id, value.name, value.client_urls) - } -} - impl From for EntryData { fn from(value: Membership) -> Self { EntryData::Member(value) diff --git a/crates/curp/src/rpc/connect/mod.rs b/crates/curp/src/rpc/connect/mod.rs index f7f7f288a..42f165694 100644 --- a/crates/curp/src/rpc/connect/mod.rs +++ b/crates/curp/src/rpc/connect/mod.rs @@ -40,9 +40,9 @@ use crate::{ AddMemberRequest, AddMemberResponse, AppendEntriesRequest, AppendEntriesResponse, CurpError, FetchReadStateRequest, FetchReadStateResponse, InstallSnapshotRequest, InstallSnapshotResponse, LeaseKeepAliveMsg, MoveLeaderRequest, MoveLeaderResponse, - ProposeRequest, Protocol, PublishRequest, PublishResponse, RemoveMemberRequest, - RemoveMemberResponse, ShutdownRequest, ShutdownResponse, TriggerShutdownRequest, - TryBecomeLeaderNowRequest, VoteRequest, VoteResponse, + ProposeRequest, Protocol, RemoveMemberRequest, RemoveMemberResponse, ShutdownRequest, + ShutdownResponse, TriggerShutdownRequest, TryBecomeLeaderNowRequest, VoteRequest, + VoteResponse, }, server::StreamingProtocol, snapshot::Snapshot, @@ -193,13 +193,6 @@ pub(crate) trait ConnectApi: Send + Sync + 'static { timeout: Duration, ) -> Result, CurpError>; - /// Send `PublishRequest` - async fn publish( - &self, - request: PublishRequest, - timeout: Duration, - ) -> Result, CurpError>; - /// Send `ShutdownRequest` async fn shutdown( &self, @@ -479,19 +472,6 @@ impl ConnectApi for Connect> { with_timeout!(timeout, client.shutdown(req)).map_err(Into::into) } - /// Send `PublishRequest` - #[instrument(skip(self), name = "client publish")] - async fn publish( - &self, - request: PublishRequest, - timeout: Duration, - ) -> Result, CurpError> { - let mut client = self.rpc_connect.clone(); - let mut req = tonic::Request::new(request); - req.metadata_mut().inject_current(); - with_timeout!(timeout, client.publish(req)).map_err(Into::into) - } - /// Send `FetchReadStateRequest` async fn fetch_read_state( &self, @@ -790,18 +770,6 @@ where self.server.read_index(req).await.map_err(Into::into) } - /// Send `PublishRequest` - async fn publish( - &self, - request: PublishRequest, - _timeout: Duration, - ) -> Result, CurpError> { - let mut req = tonic::Request::new(request); - req.metadata_mut().inject_bypassed(); - req.metadata_mut().inject_current(); - self.server.publish(req).await.map_err(Into::into) - } - /// Send `ShutdownRequest` async fn shutdown( &self, diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index d7c51709a..fa18d4f83 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -45,8 +45,6 @@ pub use self::proto::{ ProposeId as PbProposeId, ProposeRequest, ProposeResponse, - PublishRequest, - PublishResponse, QuorumSet, ReadIndexRequest, ReadIndexResponse, @@ -461,32 +459,6 @@ impl MoveLeaderRequest { } } -impl PublishRequest { - /// Create a new `PublishRequest` - pub(crate) fn new( - id: ProposeId, - node_id: ServerId, - name: String, - client_urls: Vec, - ) -> Self { - Self { - propose_id: Some(id.into()), - node_id, - name, - client_urls, - } - } - - /// Get id of the request - pub(crate) fn propose_id(&self) -> ProposeId { - self.propose_id - .unwrap_or_else(|| { - unreachable!("propose id should be set in propose conf change request") - }) - .into() - } -} - #[allow(unused)] // TODO: Use the error handling methods /// NOTICE: /// diff --git a/crates/curp/src/rpc/reconnect.rs b/crates/curp/src/rpc/reconnect.rs index 09de8fc24..2d6843c28 100644 --- a/crates/curp/src/rpc/reconnect.rs +++ b/crates/curp/src/rpc/reconnect.rs @@ -10,9 +10,9 @@ use crate::{ connect::ConnectApi, AddLearnerRequest, AddLearnerResponse, AddMemberRequest, AddMemberResponse, CurpError, FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, FetchReadStateResponse, MoveLeaderRequest, MoveLeaderResponse, - OpResponse, ProposeRequest, PublishRequest, PublishResponse, ReadIndexResponse, - RecordRequest, RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, - RemoveMemberRequest, RemoveMemberResponse, ShutdownRequest, ShutdownResponse, + OpResponse, ProposeRequest, ReadIndexResponse, RecordRequest, RecordResponse, + RemoveLearnerRequest, RemoveLearnerResponse, RemoveMemberRequest, RemoveMemberResponse, + ShutdownRequest, ShutdownResponse, }, }; @@ -119,15 +119,6 @@ impl ConnectApi for Reconnect { execute_with_reconnect!(self, ConnectApi::read_index, timeout) } - /// Send `PublishRequest` - async fn publish( - &self, - request: PublishRequest, - timeout: Duration, - ) -> Result, CurpError> { - execute_with_reconnect!(self, ConnectApi::publish, request, timeout) - } - /// Send `ShutdownRequest` async fn shutdown( &self, diff --git a/crates/curp/src/server/cmd_worker/mod.rs b/crates/curp/src/server/cmd_worker/mod.rs index f271e9e24..90e7c8475 100644 --- a/crates/curp/src/server/cmd_worker/mod.rs +++ b/crates/curp/src/server/cmd_worker/mod.rs @@ -165,10 +165,6 @@ fn after_sync_others, RC: RoleChange>( } cb.write().notify_shutdown(); } - #[allow(clippy::todo)] // FIXME: Remove this - (EntryData::SetNodeState(..), _) => { - todo!() - } // The no-op command has been applied to state machine (EntryData::Empty, _) => curp.set_no_op_applied(), (EntryData::Member(_), _) => {} diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 89f98d0a5..232b27b7b 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -50,11 +50,10 @@ use crate::{ AppendEntriesRequest, AppendEntriesResponse, CurpError, FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, FetchReadStateResponse, InstallSnapshotRequest, InstallSnapshotResponse, LeaseKeepAliveMsg, MoveLeaderRequest, - MoveLeaderResponse, Node, PoolEntry, ProposeId, ProposeRequest, ProposeResponse, - PublishRequest, PublishResponse, QuorumSet, ReadIndexResponse, RecordRequest, - RecordResponse, ShutdownRequest, ShutdownResponse, SyncedResponse, TriggerShutdownRequest, - TriggerShutdownResponse, TryBecomeLeaderNowRequest, TryBecomeLeaderNowResponse, - VoteRequest, VoteResponse, + MoveLeaderResponse, Node, PoolEntry, ProposeId, ProposeRequest, ProposeResponse, QuorumSet, + ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, ShutdownResponse, + SyncedResponse, TriggerShutdownRequest, TriggerShutdownResponse, TryBecomeLeaderNowRequest, + TryBecomeLeaderNowResponse, VoteRequest, VoteResponse, }, server::{ cmd_worker::{after_sync, worker_reset, worker_snapshot}, @@ -345,19 +344,6 @@ impl, RC: RoleChange> CurpNode { Ok(ShutdownResponse::default()) } - /// Handle `Publish` requests - pub(super) fn publish( - &self, - req: PublishRequest, - bypassed: bool, - ) -> Result { - if bypassed { - self.curp.mark_client_id_bypassed(req.propose_id().0); - } - self.curp.handle_publish(req)?; - Ok(PublishResponse::default()) - } - /// Handle lease keep alive requests pub(super) async fn lease_keep_alive( &self, diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index c5d40eaf5..befe47518 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -40,8 +40,6 @@ use crate::rpc::MoveLeaderRequest; use crate::rpc::MoveLeaderResponse; use crate::rpc::OpResponse; use crate::rpc::ProposeRequest; -use crate::rpc::PublishRequest; -use crate::rpc::PublishResponse; use crate::rpc::ReadIndexRequest; use crate::rpc::ReadIndexResponse; use crate::rpc::RecordRequest; @@ -156,18 +154,6 @@ impl, RC: RoleChange> crate::rpc::Protocol fo )) } - #[instrument(skip_all, name = "curp_publish")] - async fn publish( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - let bypassed = request.metadata().is_bypassed(); - request.metadata().extract_span(); - Ok(tonic::Response::new( - self.inner.publish(request.into_inner(), bypassed)?, - )) - } - #[instrument(skip_all, name = "curp_fetch_read_state")] async fn fetch_read_state( &self, diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index d6aa3688f..de82d2ef4 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -78,7 +78,6 @@ use crate::rpc::CurpError; use crate::rpc::IdSet; use crate::rpc::PoolEntry; use crate::rpc::ProposeId; -use crate::rpc::PublishRequest; use crate::rpc::ReadState; use crate::rpc::Redirect; use crate::server::cmd_board::CmdBoardRef; @@ -670,34 +669,6 @@ impl RawCurp { Ok(()) } - /// Handle `publish` request - pub(super) fn handle_publish(&self, req: PublishRequest) -> Result<(), CurpError> { - debug!( - "{} gets publish with propose id {}", - self.id(), - req.propose_id() - ); - let st_r = self.st.read(); - if st_r.role != Role::Leader { - return Err(CurpError::redirect(st_r.leader_id, st_r.term)); - } - if self.lst.get_transferee().is_some() { - return Err(CurpError::leader_transfer("leader transferring")); - } - - self.deduplicate(req.propose_id(), None)?; - - let mut log_w = self.log.write(); - let entry = log_w.push(st_r.term, req.propose_id(), req); - debug!("{} gets new log[{}]", self.id(), entry.index); - self.entry_process_single(&mut log_w, entry.as_ref(), false, st_r.term); - - let log_r = RwLockWriteGuard::downgrade(log_w); - self.persistent_log_entries(&[entry.as_ref()], &log_r); - - Ok(()) - } - /// Handle `lease_keep_alive` message pub(super) fn handle_lease_keep_alive(&self, client_id: u64) -> Option { let mut lm_w = self.ctx.lm.write(); @@ -1692,10 +1663,7 @@ impl RawCurp { EntryData::Command(ref cmd) => { let _ignore = ucp_l.insert(&PoolEntry::new(propose_id, Arc::clone(cmd))); } - EntryData::Shutdown - | EntryData::Empty - | EntryData::SetNodeState(_, _, _) - | EntryData::Member(_) => {} + EntryData::Shutdown | EntryData::Empty | EntryData::Member(_) => {} } } } diff --git a/crates/curp/tests/it/common/curp_group.rs b/crates/curp/tests/it/common/curp_group.rs index 2eeee176a..d39d29e2c 100644 --- a/crates/curp/tests/it/common/curp_group.rs +++ b/crates/curp/tests/it/common/curp_group.rs @@ -319,8 +319,6 @@ impl CurpGroup { task_manager, }, ); - let client = self.new_client().await; - client.propose_publish(id, name, vec![]).await.unwrap(); } pub fn all_addrs(&self) -> impl Iterator { diff --git a/crates/xline/src/server/auth_wrapper.rs b/crates/xline/src/server/auth_wrapper.rs index 87a14fc56..72c3bba6a 100644 --- a/crates/xline/src/server/auth_wrapper.rs +++ b/crates/xline/src/server/auth_wrapper.rs @@ -6,10 +6,9 @@ use curp::{ AddLearnerRequest, AddLearnerResponse, AddMemberRequest, AddMemberResponse, FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, FetchReadStateResponse, LeaseKeepAliveMsg, MoveLeaderRequest, MoveLeaderResponse, - OpResponse, ProposeRequest, Protocol, PublishRequest, PublishResponse, ReadIndexRequest, - ReadIndexResponse, RecordRequest, RecordResponse, RemoveLearnerRequest, - RemoveLearnerResponse, RemoveMemberRequest, RemoveMemberResponse, ShutdownRequest, - ShutdownResponse, + OpResponse, ProposeRequest, Protocol, ReadIndexRequest, ReadIndexResponse, RecordRequest, + RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, RemoveMemberRequest, + RemoveMemberResponse, ShutdownRequest, ShutdownResponse, }, }; use flume::r#async::RecvStream; @@ -82,13 +81,6 @@ impl Protocol for AuthWrapper { self.curp_server.shutdown(request).await } - async fn publish( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - self.curp_server.publish(request).await - } - async fn fetch_read_state( &self, request: tonic::Request, From b065c1ffc2f475c9ff962112125bebb4bb5f0c8d Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 13 Sep 2024 10:35:03 +0800 Subject: [PATCH 172/322] chore: remove unused leader_tx broadcast channel Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/mod.rs | 11 +---------- crates/curp/src/server/mod.rs | 9 --------- crates/curp/src/server/raw_curp/mod.rs | 17 ----------------- 3 files changed, 1 insertion(+), 36 deletions(-) diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 232b27b7b..b94f2c428 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -12,10 +12,7 @@ use futures::{pin_mut, stream::FuturesUnordered, Stream, StreamExt}; use madsim::rand::{thread_rng, Rng}; use opentelemetry::KeyValue; use parking_lot::{Mutex, RwLock}; -use tokio::{ - sync::{broadcast, oneshot}, - time::MissedTickBehavior, -}; +use tokio::{sync::oneshot, time::MissedTickBehavior}; #[cfg(not(madsim))] use tonic::transport::ClientTlsConfig; use tracing::{debug, error, info, trace, warn}; @@ -41,7 +38,6 @@ use crate::{ cmd::{Command, CommandExecutor}, log_entry::{EntryData, LogEntry}, member::{Membership, MembershipInfo}, - members::ServerId, response::ResponseSender, role_change::RoleChange, rpc::{ @@ -967,11 +963,6 @@ impl, RC: RoleChange> CurpNode { None } - /// Get a rx for leader changes - pub(super) fn leader_rx(&self) -> broadcast::Receiver> { - self.curp.leader_rx() - } - /// Send `append_entries` request /// Return `tonic::Error` if meet network issue /// Return (`leader_retires`, `ae_succeed`) diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index befe47518..69b289cab 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -4,7 +4,6 @@ use std::sync::Arc; use engine::SnapshotAllocator; use flume::r#async::RecvStream; use futures::{Stream, StreamExt}; -use tokio::sync::broadcast; #[cfg(not(madsim))] use tonic::transport::ClientTlsConfig; use tracing::instrument; @@ -21,7 +20,6 @@ pub use self::raw_curp::RawCurp; use crate::cmd::Command; use crate::cmd::CommandExecutor; use crate::member::MembershipInfo; -use crate::members::ServerId; use crate::response::ResponseSender; use crate::role_change::RoleChange; use crate::rpc::connect::Bypass; @@ -431,13 +429,6 @@ impl, RC: RoleChange> Rpc { Ok(()) } - /// Get a subscriber for leader changes - #[inline] - #[must_use] - pub fn leader_rx(&self) -> broadcast::Receiver> { - self.inner.leader_rx() - } - /// Get raw curp #[inline] #[must_use] diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index de82d2ef4..cf0959635 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -33,7 +33,6 @@ use parking_lot::Mutex; use parking_lot::RwLock; use parking_lot::RwLockUpgradableReadGuard; use parking_lot::RwLockWriteGuard; -use tokio::sync::broadcast; use tokio::sync::oneshot; #[cfg(not(madsim))] use tonic::transport::ClientTlsConfig; @@ -334,9 +333,6 @@ struct Context { cb: CmdBoardRef, /// The lease manager lm: LeaseManagerRef, - /// Tx to send leader changes - #[builder(setter(skip))] - leader_tx: broadcast::Sender>, /// Election tick #[builder(setter(skip))] election_tick: AtomicU8, @@ -389,7 +385,6 @@ impl ContextBuilder { Some(value) => value, None => return Err(ContextBuilderError::UninitializedField("lm")), }, - leader_tx: broadcast::channel(1).0, election_tick: AtomicU8::new(0), sync_events: match self.sync_events.take() { Some(value) => value, @@ -445,7 +440,6 @@ impl Debug for Context { f.debug_struct("Context") .field("cfg", &self.cfg) .field("cb", &self.cb) - .field("leader_tx", &self.leader_tx) .field("election_tick", &self.election_tick) .field("cmd_tx", &"CEEventTxApi") .field("sync_events", &self.sync_events) @@ -722,13 +716,11 @@ impl RawCurp { let mut st_w = RwLockUpgradableReadGuard::upgrade(st_r); self.update_to_term_and_become_follower(&mut st_w, term); st_w.leader_id = Some(leader_id); - let _ig = self.ctx.leader_tx.send(Some(leader_id)).ok(); } std::cmp::Ordering::Equal => { if st_r.leader_id.is_none() { let mut st_w = RwLockUpgradableReadGuard::upgrade(st_r); st_w.leader_id = Some(leader_id); - let _ig = self.ctx.leader_tx.send(Some(leader_id)).ok(); } } std::cmp::Ordering::Greater => { @@ -1204,11 +1196,6 @@ impl RawCurp { self.ms.read().node_id() } - /// Get a rx for leader changes - pub(super) fn leader_rx(&self) -> broadcast::Receiver> { - self.ctx.leader_tx.subscribe() - } - /// Get the effective membership pub(super) fn effective_membership(&self) -> Membership { self.ms.read().cluster().effective().clone() @@ -1436,7 +1423,6 @@ impl RawCurp { st.role = Role::PreCandidate; cst.votes_received = HashMap::from([(self.id(), true)]); st.leader_id = None; - let _ig = self.ctx.leader_tx.send(None).ok(); self.reset_election_tick(); if prev_role == Role::Follower { @@ -1479,7 +1465,6 @@ impl RawCurp { st.role = Role::Candidate; st.voted_for = Some(self.id()); st.leader_id = None; - let _ig = self.ctx.leader_tx.send(None).ok(); self.reset_election_tick(); let self_sp = self.ctx.spec_pool.map_lock(|sp| sp.all()); @@ -1522,7 +1507,6 @@ impl RawCurp { metrics::get().leader_changes.add(1, &[]); st.role = Role::Leader; st.leader_id = Some(self.id()); - let _ig = self.ctx.leader_tx.send(Some(self.id())).ok(); let _ignore = self.ctx.leader_event.notify(usize::MAX); self.ctx.role_change.on_election_win(); debug!("{} becomes the leader", self.id()); @@ -1548,7 +1532,6 @@ impl RawCurp { st.role = Role::Follower; st.voted_for = None; st.leader_id = None; - let _ig = self.ctx.leader_tx.send(None).ok(); st.randomize_timeout_ticks(); // regenerate timeout ticks debug!( "{} updates to term {term} and becomes a follower", From 3677d86c24e1505e87be937be09945229a4283be Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 13 Sep 2024 14:08:52 +0800 Subject: [PATCH 173/322] refactor: remove `connects` from `RawCurp` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/mod.rs | 12 +++--------- crates/curp/src/server/raw_curp/mod.rs | 16 +++++----------- crates/curp/src/server/raw_curp/tests.rs | 17 +---------------- 3 files changed, 9 insertions(+), 36 deletions(-) diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index b94f2c428..9ba48a485 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -566,8 +566,7 @@ impl, RC: RoleChange> CurpNode { if should_send_try_become_leader_now { if let Err(e) = self .curp - .connects() - .get(&req.node_id) + .map_connects(|conns| conns.get(&req.node_id).cloned()) .unwrap_or_else(|| unreachable!("connect to {} should exist", req.node_id)) .try_become_leader_now(self.curp.cfg().rpc_timeout) .await @@ -783,8 +782,6 @@ impl, RC: RoleChange> CurpNode { .into_iter() .map(|(id, addr)| (id, vec![addr])) .collect(); - let connects = - rpc::inner_connects(peer_addrs.clone(), client_tls_config.as_ref()).collect(); let member_connects = rpc::inner_connects(peer_addrs, client_tls_config.as_ref()).collect(); let cmd_board = Arc::new(RwLock::new(CommandBoard::new())); let lease_manager = Arc::new(RwLock::new(LeaseManager::new())); @@ -807,7 +804,6 @@ impl, RC: RoleChange> CurpNode { .remove_events(remove_events) .role_change(role_change) .task_manager(Arc::clone(&task_manager)) - .connects(connects) .last_applied(last_applied) .voted_for(voted_for) .entries(entries) @@ -1143,10 +1139,7 @@ impl, RC: RoleChange> Debug for CurpNode { sync_events: DashMap>, /// Followers remove event trigger remove_events: Arc>>>, - /// Connects of peers - connects: DashMap, /// curp storage curp_storage: Arc>, /// client tls config @@ -188,7 +186,6 @@ impl RawCurpBuilder { .sync_events(args.sync_events) .remove_events(args.remove_events) .role_change(args.role_change) - .connects(args.connects) .curp_storage(args.curp_storage) .client_tls_config(args.client_tls_config) .spec_pool(args.spec_pool) @@ -345,8 +342,6 @@ struct Context { leader_event: Arc, /// Leader change callback role_change: RC, - /// Connects of peers - connects: DashMap, /// Curp storage curp_storage: Arc>, /// Speculative pool @@ -399,10 +394,6 @@ impl ContextBuilder { Some(value) => value, None => return Err(ContextBuilderError::UninitializedField("role_change")), }, - connects: match self.connects.take() { - Some(value) => value, - None => return Err(ContextBuilderError::UninitializedField("connects")), - }, curp_storage: match self.curp_storage.take() { Some(value) => value, None => return Err(ContextBuilderError::UninitializedField("curp_storage")), @@ -1351,8 +1342,11 @@ impl RawCurp { } /// Get all connects - pub(super) fn connects(&self) -> &DashMap { - &self.ctx.connects + pub(super) fn map_connects(&self, mut f: F) -> R + where + F: FnMut(&BTreeMap) -> R, + { + self.ms.map_read(|ms| f(ms.connects())) } /// Get all connects diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index 08c388cb9..db502aa3e 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -9,7 +9,7 @@ use utils::config::{ use super::*; use crate::{ - rpc::{connect::MockInnerConnectApi, Redirect}, + rpc::Redirect, server::{ cmd_board::CommandBoard, conflict::test_pools::{TestSpecPool, TestUncomPool}, @@ -46,15 +46,6 @@ impl RawCurp { .into_iter() .map(|id| (id, Arc::new(Event::new()))) .collect(); - let connects = peer_ids - .into_iter() - .map(|id| { - ( - id, - InnerConnectApiWrapper::new_from_arc(Arc::new(MockInnerConnectApi::new())), - ) - }) - .collect(); let curp_config = CurpConfigBuilder::default() .log_entries_cap(10) .build() @@ -84,7 +75,6 @@ impl RawCurp { .sync_events(sync_events) .role_change(role_change) .task_manager(task_manager) - .connects(connects) .curp_storage(curp_storage) .spec_pool(sp) .uncommitted_pool(ucp) @@ -95,11 +85,6 @@ impl RawCurp { .unwrap() } - /// Set connect for a server - pub(crate) fn set_connect(&self, id: ServerId, connect: InnerConnectApiWrapper) { - self.ctx.connects.entry(id).and_modify(|c| *c = connect); - } - pub(crate) fn tracker(&self, client_id: u64) -> Tracker { self.ctx .cb From f77e55e2518d490be692362296afaf27865e99c5 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 13 Sep 2024 15:35:27 +0800 Subject: [PATCH 174/322] refactor: rewrite FollowerStatus Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 9 +++ crates/curp/src/server/raw_curp/mod.rs | 5 +- crates/curp/src/server/raw_curp/state.rs | 85 +++++++++++++----------- 3 files changed, 57 insertions(+), 42 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 70951bf0f..9259647a5 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -107,6 +107,15 @@ impl NodeMembershipState { self.cluster().effective().contains(node_id) } + /// Returns all member ids + pub(crate) fn members_ids(&self) -> BTreeSet { + self.cluster() + .effective() + .members() + .map(|(id, _)| id) + .collect() + } + /// Updates the connects /// /// Returns a pair of (removed, added) connects diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index c35eb9568..7e0583018 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -1549,10 +1549,11 @@ impl RawCurp { return false; } + let member_ids = self.ms.map_read(|ms| ms.members_ids()); let replicated_ids: Vec<_> = self .lst - .iter() - .filter_map(|f| (!f.is_learner && f.match_index >= i).then_some(*f.key())) + .map_status(|(id, f)| (member_ids.contains(id) && f.match_index >= i).then_some(*id)) + .flatten() .chain(iter::once(self.node_id())) .collect(); diff --git a/crates/curp/src/server/raw_curp/state.rs b/crates/curp/src/server/raw_curp/state.rs index 722187d0f..654df63fa 100644 --- a/crates/curp/src/server/raw_curp/state.rs +++ b/crates/curp/src/server/raw_curp/state.rs @@ -4,17 +4,12 @@ use std::{ sync::atomic::{AtomicBool, AtomicU64, Ordering}, }; -use dashmap::{ - mapref::{ - multiple::RefMulti, - one::{Ref, RefMut}, - }, - DashMap, -}; use event_listener::Event; use futures::{future, Future}; use madsim::rand::{thread_rng, Rng}; +use parking_lot::RwLock; use tracing::{debug, warn}; +use utils::parking_lot_lock::RwLockMap; use super::Role; use crate::{members::ServerId, rpc::PoolEntry, LogIndex}; @@ -54,23 +49,20 @@ pub(super) struct CandidateState { pub(super) votes_received: HashMap, } -/// Status of a follower +/// Status of a Node #[derive(Debug, Copy, Clone)] -pub(super) struct FollowerStatus { - /// Index of the next log entry to send to that follower +pub(super) struct NodeStatus { + /// Index of the next log entry to send to that node pub(super) next_index: LogIndex, - /// Index of highest log entry known to be replicated on that follower + /// Index of highest log entry known to be replicated on that node pub(super) match_index: LogIndex, - /// This node is a learner or not - pub(super) is_learner: bool, } -impl Default for FollowerStatus { +impl Default for NodeStatus { fn default() -> Self { Self { next_index: 1, match_index: 0, - is_learner: false, } } } @@ -79,7 +71,7 @@ impl Default for FollowerStatus { #[derive(Debug)] pub(super) struct LeaderState { /// For each server, the leader maintains its status - statuses: DashMap, + statuses: RwLock>, /// Leader Transferee leader_transferee: AtomicU64, /// Event of the application of the no-op log, used for readIndex @@ -149,62 +141,75 @@ impl LeaderState { where I: IntoIterator, { + let statuses = others + .into_iter() + .map(|o| (o, NodeStatus::default())) + .collect(); + Self { - statuses: others - .into_iter() - .map(|o| (o, FollowerStatus::default())) - .collect(), + statuses: RwLock::new(statuses), leader_transferee: AtomicU64::new(0), no_op_state: NoOpState::default(), } } /// Get status for a server - fn get_status(&self, id: ServerId) -> Option> { - self.statuses.get(&id) + fn map_status_with_id(&self, id: ServerId, f: F) -> Option + where + F: FnMut(&NodeStatus) -> R, + { + self.statuses.map_read(|statuses| statuses.get(&id).map(f)) } /// Get status for a server - fn get_status_mut(&self, id: ServerId) -> Option> { - self.statuses.get_mut(&id) + fn map_status_with_id_mut(&self, id: ServerId, f: F) -> Option + where + F: FnMut(&mut NodeStatus) -> R, + { + self.statuses + .map_write(|mut statuses| statuses.get_mut(&id).map(f)) } /// Get `next_index` for server pub(super) fn get_next_index(&self, id: ServerId) -> Option { - self.get_status(id).map(|s| s.next_index) + self.map_status_with_id(id, |s| s.next_index) } /// Get `match_index` for server pub(super) fn get_match_index(&self, id: ServerId) -> Option { - self.get_status(id).map(|s| s.match_index) + self.map_status_with_id(id, |s| s.match_index) } /// Update `next_index` for server pub(super) fn update_next_index(&self, id: ServerId, index: LogIndex) { - let Some(mut status) = self.get_status_mut(id) else { + let opt = self.map_status_with_id_mut(id, |status| status.next_index = index); + if opt.is_none() { warn!("follower {} is not found, it maybe has been removed", id); - return; - }; - status.next_index = index; + } } /// Update `match_index` for server, will update `next_index` if possible pub(super) fn update_match_index(&self, id: ServerId, index: LogIndex) { - let Some(mut status) = self.get_status_mut(id) else { + let opt = self.map_status_with_id_mut(id, |status| { + if status.match_index >= index { + return; + } + status.match_index = index; + status.next_index = index + 1; + debug!("follower {id}'s match_index updated to {index}"); + }); + if opt.is_none() { warn!("follower {} is not found, it maybe has been removed", id); - return; }; - if status.match_index >= index { - return; - } - status.match_index = index; - status.next_index = index + 1; - debug!("follower {id}'s match_index updated to {index}"); } /// Create a `Iterator` for all statuses - pub(super) fn iter(&self) -> impl Iterator> { - self.statuses.iter() + pub(super) fn map_status(&self, f: F) -> impl Iterator + where + F: FnMut((&u64, &NodeStatus)) -> R, + { + self.statuses + .map_read(|status| status.iter().map(f).collect::>().into_iter()) } /// Get transferee From 055ba6f70f6301e5c5b48c6d283eea06ad4d149d Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sat, 14 Sep 2024 10:36:35 +0800 Subject: [PATCH 175/322] refactor: merge all states of nodes Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 64 +---- crates/curp/src/rpc/connect/mod.rs | 6 +- crates/curp/src/server/curp_node/mod.rs | 22 +- .../curp/src/server/raw_curp/member_impl.rs | 44 ++-- crates/curp/src/server/raw_curp/mod.rs | 135 +++++----- crates/curp/src/server/raw_curp/node_state.rs | 241 ++++++++++++++++++ crates/curp/src/server/raw_curp/state.rs | 75 +----- crates/curp/src/server/raw_curp/tests.rs | 14 +- 8 files changed, 333 insertions(+), 268 deletions(-) create mode 100644 crates/curp/src/server/raw_curp/node_state.rs diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 9259647a5..3d5ce7fc7 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -6,7 +6,6 @@ use std::collections::HashSet; use std::hash::Hash; use std::hash::Hasher; use std::iter; -use std::sync::Arc; use curp_external_api::LogIndex; use serde::Deserialize; @@ -14,8 +13,6 @@ use serde::Serialize; use crate::quorum::Joint; use crate::quorum::QuorumSet; -use crate::rpc::connect::InnerConnectApi; -use crate::rpc::connect::InnerConnectApiWrapper; /// The membership info, used to build the initial states #[derive(Debug, Clone)] @@ -57,17 +54,11 @@ pub(crate) struct NodeMembershipState { node_id: u64, /// The membership state of the cluster cluster_state: MembershipState, - #[allow(unused)] - /// The rpc connects of nodes - connects: BTreeMap, } impl NodeMembershipState { /// Creates a new `NodeMembershipState` with initial state - pub(crate) fn new( - info: MembershipInfo, - init_connects: BTreeMap, - ) -> Self { + pub(crate) fn new(info: MembershipInfo) -> Self { let node_id = info.node_id; let init_ms = info.into_membership(); let cluster_state = MembershipState { @@ -78,7 +69,6 @@ impl NodeMembershipState { Self { node_id, cluster_state, - connects: init_connects, } } @@ -116,58 +106,6 @@ impl NodeMembershipState { .collect() } - /// Updates the connects - /// - /// Returns a pair of (removed, added) connects - pub(crate) fn update_connects( - &mut self, - new_connects: &BTreeMap, - ) -> ( - BTreeMap, - BTreeMap, - ) { - /// Alias - type Map = BTreeMap; - let diff = |x: &Map, y: &Map| { - x.iter() - .filter_map(|(k, c)| (!y.contains_key(k)).then_some((*k, c.clone()))) - .collect::>() - }; - let removed = diff(&self.connects, new_connects); - let added = diff(new_connects, &self.connects); - - for k in removed.keys() { - let _ignore = self.connects.remove(k); - } - self.connects.extend(added.clone()); - - (removed, added) - } - - #[allow(unused)] - /// Get all rpc connects - pub(crate) fn connects(&self) -> &BTreeMap { - &self.connects - } - - /// Get all voter connects - pub(crate) fn voter_connects(&self) -> BTreeMap> { - self.cluster() - .effective() - .members() - .map(|(id, _)| { - ( - id, - Arc::clone( - self.connects - .get(&id) - .unwrap_or_else(|| unreachable!("connect should always exist")), - ), - ) - }) - .collect() - } - /// Returns `true` if the given set of nodes forms a quorum pub(crate) fn check_quorum(&self, nodes: I, mut expect_quorum: Q) -> bool where diff --git a/crates/curp/src/rpc/connect/mod.rs b/crates/curp/src/rpc/connect/mod.rs index 42f165694..e4e4381f7 100644 --- a/crates/curp/src/rpc/connect/mod.rs +++ b/crates/curp/src/rpc/connect/mod.rs @@ -302,8 +302,12 @@ impl InnerConnectApiWrapper { pub(crate) fn new_from_arc(connect: Arc) -> Self { Self(connect) } -} + /// Consume the wrapper and return the inner `Arc` + pub(crate) fn into_inner(self) -> Arc { + self.0 + } +} impl Debug for InnerConnectApiWrapper { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_struct("InnerConnectApiWrapper").finish() diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 9ba48a485..57f689a13 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -566,7 +566,8 @@ impl, RC: RoleChange> CurpNode { if should_send_try_become_leader_now { if let Err(e) = self .curp - .map_connects(|conns| conns.get(&req.node_id).cloned()) + .connects(Some(&req.node_id)) + .next() .unwrap_or_else(|| unreachable!("connect to {} should exist", req.node_id)) .try_become_leader_now(self.curp.cfg().rpc_timeout) .await @@ -763,19 +764,6 @@ impl, RC: RoleChange> CurpNode { sps: Vec>, ucps: Vec>, ) -> Result { - let sync_events = membership_info - .init_members - .keys() - .map(|id| (*id, Arc::new(Event::new()))) - .collect(); - let remove_events = Arc::new(Mutex::new( - membership_info - .init_members - .keys() - .map(|id| (*id, Arc::new(Event::new()))) - .collect(), - )); - let peer_addrs: HashMap<_, _> = membership_info .init_members .clone() @@ -800,8 +788,6 @@ impl, RC: RoleChange> CurpNode { .cmd_board(Arc::clone(&cmd_board)) .lease_manager(Arc::clone(&lease_manager)) .cfg(Arc::clone(&curp_cfg)) - .sync_events(sync_events) - .remove_events(remove_events) .role_change(role_change) .task_manager(Arc::clone(&task_manager)) .last_applied(last_applied) @@ -865,9 +851,7 @@ impl, RC: RoleChange> CurpNode { curp.with_member_connects(|connects| { for c in connects.values() { - let sync_event = curp.sync_event(c.id()); - let remove_event = curp.remove_event(c.id()); - + let (sync_event, remove_event) = curp.events(c.id()); task_manager.spawn(TaskName::SyncFollower, |n| { Self::sync_follower_task( Arc::clone(&curp), diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 9ffc19e92..5f7dd1e54 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -1,4 +1,5 @@ use std::collections::BTreeMap; +use std::collections::BTreeSet; use std::sync::Arc; use curp_external_api::cmd::Command; @@ -21,6 +22,8 @@ use crate::server::StorageError; use super::RawCurp; use super::Role; +impl RawCurp {} + impl RawCurp { /// Generates new node ids /// TODO: makes sure that the ids are unique @@ -51,13 +54,11 @@ impl RawCurp { let st_r = self.st.read(); let propose_id = ProposeId(rand::random(), 0); let entry = log_w.push(st_r.term, propose_id, config.clone()); - let new_connects = self.build_connects(&config); + self.on_membership_update(&config, &spawn_sync); ms_w.cluster_mut().append(entry.index, config); - let (removed, added) = ms_w.update_connects(&new_connects); self.ctx .curp_storage .put_membership(ms_w.node_id(), ms_w.cluster())?; - self.update_node_sync(removed, added, spawn_sync); Ok(propose_id) } @@ -86,9 +87,7 @@ impl RawCurp { } }); for (index, config) in configs { - let new_connects = self.build_connects(&config); - let (removed, added) = ms_w.update_connects(&new_connects); - self.update_node_sync(removed, added, &spawn_sync); + self.on_membership_update(&config, &spawn_sync); ms_w.cluster_mut().append(index, config); ms_w.cluster_mut().commit(commit_index.min(index)); self.ctx @@ -132,29 +131,22 @@ impl RawCurp { inner_connects(nodes, self.client_tls_config()).collect() } - /// Updates the background task of node sync - fn update_node_sync( - &self, - removed: BTreeMap, - added: BTreeMap, - spawn_sync: F, - ) where + /// Actions on membership update + fn on_membership_update(&self, membership: &Membership, spawn_sync: F) + where F: Fn(Arc, Arc, InnerConnectApiWrapper), { - let mut remove_events_l = self.ctx.remove_events.lock(); - for (id, connect) in added { - let sync_event = Arc::new(Event::new()); - let remove_event = Arc::new(Event::new()); - _ = self.ctx.sync_events.insert(id, Arc::new(Event::new())); - let _ignore = remove_events_l.insert(id, Arc::new(Event::new())); + let node_ids: BTreeSet<_> = membership.nodes.keys().copied().collect(); + let new_connects = self.build_connects(membership); + let connect_to = move |ids: &BTreeSet| { + ids.iter() + .filter_map(|id| new_connects.get(id).cloned()) + .collect::>() + }; + let added = self.ctx.node_states.update_with(&node_ids, connect_to); + for state in added.into_values() { + let (_, connect, sync_event, remove_event) = state.clone_parts(); spawn_sync(sync_event, remove_event, connect); } - for (id, _connect) in removed { - _ = self.ctx.sync_events.remove(&id); - assert!( - remove_events_l.remove(&id).map(|e| e.notify(1)).is_some(), - "id doesn't exist" - ); - } } } diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 7e0583018..cf66890f9 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -23,7 +23,6 @@ use std::sync::Arc; use clippy_utilities::NumericCast; use clippy_utilities::OverflowArithmetic; -use dashmap::DashMap; use derive_builder::Builder; use event_listener::Event; use futures::Future; @@ -51,6 +50,7 @@ use utils::task_manager::TaskManager; use utils::ClientTlsConfig; use self::log::Log; +use self::node_state::NodeStates; use self::state::CandidateState; use self::state::LeaderState; use self::state::State; @@ -98,6 +98,9 @@ mod tests; /// Membership implementation mod member_impl; +/// Unified state for each node +mod node_state; + /// The curp state machine pub struct RawCurp { /// Curp state @@ -136,10 +139,6 @@ pub(super) struct RawCurpArgs { role_change: RC, /// Task manager task_manager: Arc, - /// Sync events - sync_events: DashMap>, - /// Followers remove event trigger - remove_events: Arc>>>, /// curp storage curp_storage: Arc>, /// client tls config @@ -175,7 +174,7 @@ impl RawCurpBuilder { args.cfg.follower_timeout_ticks, args.cfg.candidate_timeout_ticks, )); - let lst = LeaderState::new(args.membership_info.init_members.keys().copied()); + let lst = LeaderState::new(); let cst = Mutex::new(CandidateState::new()); let log = RwLock::new(Log::new(args.cfg.batch_max_size, args.cfg.log_entries_cap)); @@ -183,8 +182,6 @@ impl RawCurpBuilder { .cb(args.cmd_board) .lm(args.lease_manager) .cfg(args.cfg) - .sync_events(args.sync_events) - .remove_events(args.remove_events) .role_change(args.role_change) .curp_storage(args.curp_storage) .client_tls_config(args.client_tls_config) @@ -193,6 +190,9 @@ impl RawCurpBuilder { .as_tx(args.as_tx) .resp_txs(args.resp_txs) .id_barrier(args.id_barrier) + .node_states(Arc::new(NodeStates::new_from_connects( + args.member_connects, + ))) .build() .map_err(|e| match e { ContextBuilderError::UninitializedField(s) => { @@ -208,10 +208,7 @@ impl RawCurpBuilder { log, ctx, task_manager: args.task_manager, - ms: RwLock::new(NodeMembershipState::new( - args.membership_info, - args.member_connects, - )), + ms: RwLock::new(NodeMembershipState::new(args.membership_info)), }; if args.is_leader { @@ -333,10 +330,6 @@ struct Context { /// Election tick #[builder(setter(skip))] election_tick: AtomicU8, - /// Followers sync event trigger - sync_events: DashMap>, - /// Followers remove event trigger - remove_events: Arc>>>, /// Become leader event #[builder(setter(skip))] leader_event: Arc, @@ -355,6 +348,8 @@ struct Context { resp_txs: Arc>>>, /// Barrier for waiting unsynced commands id_barrier: Arc>, + /// States of nodes in the cluster + node_states: Arc, } impl Context { @@ -381,14 +376,6 @@ impl ContextBuilder { None => return Err(ContextBuilderError::UninitializedField("lm")), }, election_tick: AtomicU8::new(0), - sync_events: match self.sync_events.take() { - Some(value) => value, - None => return Err(ContextBuilderError::UninitializedField("sync_events")), - }, - remove_events: match self.remove_events.take() { - Some(value) => value, - None => return Err(ContextBuilderError::UninitializedField("remove_events")), - }, leader_event: Arc::new(Event::new()), role_change: match self.role_change.take() { Some(value) => value, @@ -422,6 +409,10 @@ impl ContextBuilder { Some(value) => value, None => return Err(ContextBuilderError::UninitializedField("id_barrier")), }, + node_states: match self.node_states.take() { + Some(value) => value, + None => return Err(ContextBuilderError::UninitializedField("node_states")), + }, }) } } @@ -433,7 +424,6 @@ impl Debug for Context { .field("cb", &self.cb) .field("election_tick", &self.election_tick) .field("cmd_tx", &"CEEventTxApi") - .field("sync_events", &self.sync_events) .field("leader_event", &self.leader_event) .finish() } @@ -763,7 +753,9 @@ impl RawCurp { } if !success { - self.lst.update_next_index(follower_id, hint_index); + self.ctx + .node_states + .update_next_index(follower_id, hint_index); debug!( "{} updates follower {}'s next_index to {hint_index} because it rejects ae", self.id(), @@ -777,7 +769,9 @@ impl RawCurp { return Ok(true); }; - self.lst.update_match_index(follower_id, last_sent_index); + self.ctx + .node_states + .update_match_index(follower_id, last_sent_index); // check if commit_index needs to be updated let log_r = self.log.upgradable_read(); @@ -973,13 +967,13 @@ impl RawCurp { .members() .filter_map(|(id, _)| (id != ms_r.node_id()).then_some(id)); for other in peers { - self.lst.update_next_index(other, last_log_index + 1); // iter from the end to front is more likely to match the follower + self.ctx + .node_states + .update_next_index(other, last_log_index + 1); // iter from the end to front is more likely to match the follower } if prev_last_log_index < last_log_index { // if some entries are recovered, sync with followers immediately - self.ctx.sync_events.iter().for_each(|event| { - let _ignore = event.notify(1); - }); + self.ctx.node_states.notify_sync_events(|_| true); } Ok(true) @@ -1066,7 +1060,8 @@ impl RawCurp { if cur_role != Role::Leader { return Err(()); } - self.lst + self.ctx + .node_states .update_match_index(follower_id, meta.last_included_index.numeric_cast()); Ok(()) } @@ -1116,13 +1111,15 @@ impl RawCurp { } self.reset_election_tick(); let match_index = self - .lst + .ctx + .node_states .get_match_index(target_id) .unwrap_or_else(|| unreachable!("node should exist,checked before")); if match_index == self.log.read().last_log_index() { Ok(true) } else { - let _ignore = self.sync_event(target_id).notify(1); + let (sync_event, _) = self.events(target_id); + let _ignore = sync_event.notify(1); Ok(false) } } @@ -1203,7 +1200,7 @@ impl RawCurp { st_r.term }; - let Some(next_index) = self.lst.get_next_index(follower_id) else { + let Some(next_index) = self.ctx.node_states.get_next_index(follower_id) else { warn!( "follower {} is not found, it maybe has been removed", follower_id @@ -1294,27 +1291,12 @@ impl RawCurp { &self.ctx.uncommitted_pool } - /// Get sync event - pub(super) fn sync_event(&self, id: ServerId) -> Arc { - Arc::clone( - self.ctx - .sync_events - .get(&id) - .unwrap_or_else(|| unreachable!("server id {id} not found")) - .value(), - ) - } - - // TODO: we could directly abort the sync task instead of signal it manually - /// Get remove event - pub(super) fn remove_event(&self, id: ServerId) -> Arc { - Arc::clone( - self.ctx - .remove_events - .lock() - .get(&id) - .unwrap_or_else(|| unreachable!("server id {id} not found")), - ) + /// Get (`sync_event`, `remove_event`) + pub(super) fn events(&self, id: u64) -> (Arc, Arc) { + let t = self.ctx.node_states.clone_events(Some(id)); + t.into_iter() + .next() + .unwrap_or_else(|| unreachable!("server id {id} not found")) } /// Check if the current node is shutting down @@ -1336,17 +1318,18 @@ impl RawCurp { pub(super) fn is_synced(&self, node_id: ServerId) -> bool { let log_r = self.log.read(); let leader_commit_index = log_r.commit_index; - self.lst + self.ctx + .node_states .get_match_index(node_id) .is_some_and(|match_index| match_index == leader_commit_index) } - /// Get all connects - pub(super) fn map_connects(&self, mut f: F) -> R - where - F: FnMut(&BTreeMap) -> R, - { - self.ms.map_read(|ms| f(ms.connects())) + /// Get rpc connect connects by ids + pub(super) fn connects<'a, Ids: IntoIterator>( + &self, + ids: Ids, + ) -> impl Iterator { + self.ctx.node_states.connects(ids) } /// Get all connects @@ -1354,13 +1337,18 @@ impl RawCurp { where F: FnMut(&BTreeMap) -> R, { - op(self.ms.read().connects()) + op(&self.ctx.node_states.all_connects()) } /// Get voters connects pub(super) fn voters_connects(&self) -> BTreeMap> { - let ms_r = self.ms.read(); - ms_r.voter_connects() + let voters = self.ms.map_read(|ms| ms.members_ids()); + let connects = self + .ctx + .node_states + .connects(voters.iter()) + .map(InnerConnectApiWrapper::into_inner); + voters.iter().copied().zip(connects).collect() } /// Get transferee @@ -1370,7 +1358,7 @@ impl RawCurp { /// Get match index of a node pub(super) fn get_match_index(&self, id: ServerId) -> Option { - self.lst.get_match_index(id) + self.ctx.node_states.get_match_index(id) } /// Get last log index @@ -1551,7 +1539,8 @@ impl RawCurp { let member_ids = self.ms.map_read(|ms| ms.members_ids()); let replicated_ids: Vec<_> = self - .lst + .ctx + .node_states .map_status(|(id, f)| (member_ids.contains(id) && f.match_index >= i).then_some(*id)) .flatten() .chain(iter::once(self.node_id())) @@ -1686,13 +1675,9 @@ impl RawCurp { /// Notify sync events fn notify_sync_events(&self, log: &Log) { - self.ctx.sync_events.iter().for_each(|e| { - if let Some(next) = self.lst.get_next_index(*e.key()) { - if next > log.base_index && log.has_next_batch(next) { - let _ignore = e.notify(1); - } - } - }); + self.ctx + .node_states + .notify_sync_events(|next| next > log.base_index && log.has_next_batch(next)); } /// Update index in single node cluster diff --git a/crates/curp/src/server/raw_curp/node_state.rs b/crates/curp/src/server/raw_curp/node_state.rs new file mode 100644 index 000000000..a712cad0a --- /dev/null +++ b/crates/curp/src/server/raw_curp/node_state.rs @@ -0,0 +1,241 @@ +use std::{ + collections::{BTreeMap, BTreeSet}, + sync::Arc, +}; + +use curp_external_api::LogIndex; +use event_listener::Event; +use parking_lot::RwLock; +use tracing::{debug, info, warn}; + +use crate::rpc::connect::InnerConnectApiWrapper; + +use super::state::NodeStatus; + +/// States of all nodes +#[derive(Debug)] +pub(crate) struct NodeStates { + /// The states + states: RwLock>, +} + +impl NodeStates { + /// Creates a new `NodeStates` + pub(super) fn new_from_connects(connects: Connects) -> Self + where + Connects: IntoIterator, + { + let states = connects + .into_iter() + .map(|(id, conn)| (id, NodeState::new(conn))) + .collect(); + + Self { + states: RwLock::new(states), + } + } + + /// Updates the node states based on the provided set of ids. + /// + /// Returns the newly added node states. + pub(super) fn update_with( + &self, + ids: &BTreeSet, + mut connect_to: ConnectTo, + ) -> BTreeMap + where + ConnectTo: FnMut(&BTreeSet) -> Connects, + Connects: IntoIterator, + { + let mut states_w = self.states.write(); + let old_ids: BTreeSet<_> = states_w.keys().copied().collect(); + let added: BTreeSet<_> = ids.difference(&old_ids).copied().collect(); + let removed: BTreeSet<_> = old_ids.difference(ids).copied().collect(); + states_w.retain(|id, _| !removed.contains(id)); + let new_connects = connect_to(&added); + let new_states: BTreeMap<_, _> = added + .clone() + .into_iter() + .zip(new_connects.into_iter().map(NodeState::new)) + .collect(); + states_w.extend(new_states.clone()); + + info!("added nodes: {added:?}, removed nodes: {removed:?}"); + + new_states + } + + /// Update `next_index` for server + pub(super) fn update_next_index(&self, id: u64, index: LogIndex) { + let mut states_w = self.states.write(); + let opt = states_w + .get_mut(&id) + .map(|state| state.status_mut().next_index = index); + if opt.is_none() { + warn!("follower {} is not found, it maybe has been removed", id); + } + } + + /// Update `match_index` for server, will update `next_index` if possible + pub(super) fn update_match_index(&self, id: u64, index: LogIndex) { + let mut states_w = self.states.write(); + let opt = states_w.get_mut(&id).map(|state| { + let status = state.status_mut(); + if status.match_index >= index { + return; + } + status.match_index = index; + status.next_index = index + 1; + debug!("follower {id}'s match_index updated to {index}"); + }); + if opt.is_none() { + warn!("follower {} is not found, it maybe has been removed", id); + }; + } + /// Get `next_index` for server + pub(super) fn get_next_index(&self, id: u64) -> Option { + let states_r = self.states.read(); + states_r.get(&id).map(|state| state.status().next_index) + } + + /// Get `match_index` for server + pub(super) fn get_match_index(&self, id: u64) -> Option { + let states_r = self.states.read(); + states_r.get(&id).map(|state| state.status().match_index) + } + + /// Create a `Iterator` for all statuses + pub(super) fn map_status(&self, f: F) -> impl Iterator + where + F: FnMut((&u64, &NodeStatus)) -> R, + { + let states_r = self.states.read(); + states_r + .keys() + .zip(states_r.values().map(NodeState::status)) + .map(f) + .collect::>() + .into_iter() + } + + /// Clone the references of the events + pub(super) fn clone_events>( + &self, + ids: I, + ) -> Vec<(Arc, Arc)> { + let states_r = self.states.read(); + ids.into_iter() + .filter_map(|id| states_r.get(&id).map(NodeState::close_events)) + .collect() + } + + /// Notify sync events + pub(super) fn notify_sync_events(&self, filter: F) + where + F: Fn(LogIndex) -> bool, + { + let states_r = self.states.read(); + states_r + .values() + .filter(|state| filter(state.status().next_index)) + .for_each(|state| { + let _ignore = state.sync_event().notify(1); + }); + } + + /// Get rpc connect connects by ids + pub(super) fn connects<'a, Ids: IntoIterator>( + &self, + ids: Ids, + ) -> impl Iterator { + let states_r = self.states.read(); + ids.into_iter() + .filter_map(|id| states_r.get(id).map(NodeState::connect).cloned()) + .collect::>() + .into_iter() + } + + /// Get all rpc connects + pub(super) fn all_connects(&self) -> BTreeMap { + let states_r = self.states.read(); + states_r + .keys() + .copied() + .zip(states_r.values().map(NodeState::connect).cloned()) + .collect() + } +} + +/// The state of a node +#[derive(Clone, Debug)] +pub(super) struct NodeState { + /// The status of current node + status: NodeStatus, + /// The connect to the node + connect: InnerConnectApiWrapper, + /// Sync event trigger for a follower + sync_event: Arc, + /// Remove event trigger for a node + remove_event: Arc, +} + +impl NodeState { + /// Creates a new `NodeState` + fn new(connect: InnerConnectApiWrapper) -> Self { + Self { + connect, + status: NodeStatus::default(), + sync_event: Arc::default(), + remove_event: Arc::default(), + } + } + + /// Get the status of the current node + pub(super) fn status(&self) -> &NodeStatus { + &self.status + } + + /// Get the connection to the node + pub(super) fn connect(&self) -> &InnerConnectApiWrapper { + &self.connect + } + + /// Clone the references of the events + fn close_events(&self) -> (Arc, Arc) { + (Arc::clone(&self.sync_event), Arc::clone(&self.remove_event)) + } + + /// Get the sync event trigger for a follower + pub(super) fn sync_event(&self) -> &Event { + &self.sync_event + } + + /// Get a mutable reference to the status of the current node + pub(super) fn status_mut(&mut self) -> &mut NodeStatus { + &mut self.status + } + + /// Clone parts of self + pub(super) fn clone_parts( + &self, + ) -> (NodeStatus, InnerConnectApiWrapper, Arc, Arc) { + let NodeState { + ref status, + ref connect, + ref sync_event, + ref remove_event, + } = *self; + ( + *status, + connect.clone(), + Arc::clone(sync_event), + Arc::clone(remove_event), + ) + } +} + +impl Drop for NodeState { + fn drop(&mut self) { + let _ignore = self.remove_event.notify(1); + } +} diff --git a/crates/curp/src/server/raw_curp/state.rs b/crates/curp/src/server/raw_curp/state.rs index 654df63fa..39efd8f16 100644 --- a/crates/curp/src/server/raw_curp/state.rs +++ b/crates/curp/src/server/raw_curp/state.rs @@ -7,9 +7,6 @@ use std::{ use event_listener::Event; use futures::{future, Future}; use madsim::rand::{thread_rng, Rng}; -use parking_lot::RwLock; -use tracing::{debug, warn}; -use utils::parking_lot_lock::RwLockMap; use super::Role; use crate::{members::ServerId, rpc::PoolEntry, LogIndex}; @@ -70,8 +67,6 @@ impl Default for NodeStatus { /// Additional state for the leader, all volatile #[derive(Debug)] pub(super) struct LeaderState { - /// For each server, the leader maintains its status - statuses: RwLock>, /// Leader Transferee leader_transferee: AtomicU64, /// Event of the application of the no-op log, used for readIndex @@ -137,81 +132,13 @@ impl State { impl LeaderState { /// Create a `LeaderState` - pub(super) fn new(others: I) -> Self - where - I: IntoIterator, - { - let statuses = others - .into_iter() - .map(|o| (o, NodeStatus::default())) - .collect(); - + pub(super) fn new() -> Self { Self { - statuses: RwLock::new(statuses), leader_transferee: AtomicU64::new(0), no_op_state: NoOpState::default(), } } - /// Get status for a server - fn map_status_with_id(&self, id: ServerId, f: F) -> Option - where - F: FnMut(&NodeStatus) -> R, - { - self.statuses.map_read(|statuses| statuses.get(&id).map(f)) - } - - /// Get status for a server - fn map_status_with_id_mut(&self, id: ServerId, f: F) -> Option - where - F: FnMut(&mut NodeStatus) -> R, - { - self.statuses - .map_write(|mut statuses| statuses.get_mut(&id).map(f)) - } - - /// Get `next_index` for server - pub(super) fn get_next_index(&self, id: ServerId) -> Option { - self.map_status_with_id(id, |s| s.next_index) - } - - /// Get `match_index` for server - pub(super) fn get_match_index(&self, id: ServerId) -> Option { - self.map_status_with_id(id, |s| s.match_index) - } - - /// Update `next_index` for server - pub(super) fn update_next_index(&self, id: ServerId, index: LogIndex) { - let opt = self.map_status_with_id_mut(id, |status| status.next_index = index); - if opt.is_none() { - warn!("follower {} is not found, it maybe has been removed", id); - } - } - - /// Update `match_index` for server, will update `next_index` if possible - pub(super) fn update_match_index(&self, id: ServerId, index: LogIndex) { - let opt = self.map_status_with_id_mut(id, |status| { - if status.match_index >= index { - return; - } - status.match_index = index; - status.next_index = index + 1; - debug!("follower {id}'s match_index updated to {index}"); - }); - if opt.is_none() { - warn!("follower {} is not found, it maybe has been removed", id); - }; - } - - /// Create a `Iterator` for all statuses - pub(super) fn map_status(&self, f: F) -> impl Iterator - where - F: FnMut((&u64, &NodeStatus)) -> R, - { - self.statuses - .map_read(|status| status.iter().map(f).collect::>().into_iter()) - } - /// Get transferee pub(super) fn get_transferee(&self) -> Option { let val = self.leader_transferee.load(Ordering::Acquire); diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index db502aa3e..f2bfbcfee 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -38,14 +38,9 @@ impl RawCurp { role_change: TestRoleChange, task_manager: Arc, ) -> Self { - let peer_ids: Vec<_> = (1..n).collect(); + let _peer_ids: Vec<_> = (1..n).collect(); let cmd_board = Arc::new(RwLock::new(CommandBoard::new())); let lease_manager = Arc::new(RwLock::new(LeaseManager::new())); - let sync_events = peer_ids - .clone() - .into_iter() - .map(|id| (id, Arc::new(Event::new()))) - .collect(); let curp_config = CurpConfigBuilder::default() .log_entries_cap(10) .build() @@ -72,7 +67,6 @@ impl RawCurp { .cmd_board(cmd_board) .lease_manager(lease_manager) .cfg(Arc::new(curp_config)) - .sync_events(sync_events) .role_change(role_change) .task_manager(task_manager) .curp_storage(curp_storage) @@ -231,7 +225,7 @@ fn heartbeat_will_calibrate_next_index() { let st_r = curp.st.read(); assert_eq!(st_r.term, 1); - assert_eq!(curp.lst.get_next_index(s1_id), Some(1)); + assert_eq!(curp.ctx.node_states.get_next_index(s1_id), Some(1)); } #[traced_test] @@ -658,8 +652,8 @@ fn is_synced_should_return_true_when_followers_caught_up_with_leader() { assert!(!curp.is_synced(s1_id)); assert!(!curp.is_synced(s2_id)); - curp.lst.update_match_index(s1_id, 3); - curp.lst.update_match_index(s2_id, 3); + curp.ctx.node_states.update_match_index(s1_id, 3); + curp.ctx.node_states.update_match_index(s2_id, 3); assert!(curp.is_synced(s1_id)); assert!(curp.is_synced(s2_id)); } From 72507861f8cefef1f4015cc6b309e091d8034614 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sat, 14 Sep 2024 10:49:54 +0800 Subject: [PATCH 176/322] refactor: fix xline servers Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> refactor: maintenance server learner status Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> refactor: fix lease_server leader addrs Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> refactor: fix lock server watch Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/raw_curp/mod.rs | 11 ++++++----- crates/xline/src/server/lease_server.rs | 16 ++++++++++++---- crates/xline/src/server/maintenance.rs | 3 +-- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index cf66890f9..ef9643d17 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -1161,6 +1161,12 @@ impl RawCurp { self.log.read().commit_index } + /// Retruns `true` if the current node is a learner + #[inline] + pub fn is_learner(&self) -> bool { + !self.ms.read().is_member() + } + #[allow(clippy::unused_self)] #[cfg(test)] /// Get cluster id by it's name @@ -1174,11 +1180,6 @@ impl RawCurp { self.ms.read().node_id() } - /// Retruns `true` if the current node is a learner - pub(super) fn is_learner(&self) -> bool { - !self.ms.read().is_member() - } - /// Get self's node id pub(super) fn node_id(&self) -> u64 { self.ms.read().node_id() diff --git a/crates/xline/src/server/lease_server.rs b/crates/xline/src/server/lease_server.rs index 1fab5aad2..24549b1f9 100644 --- a/crates/xline/src/server/lease_server.rs +++ b/crates/xline/src/server/lease_server.rs @@ -307,8 +307,12 @@ impl Lease for LeaseServer { // a follower when it lost the election. Therefore we need to double check here. // We can directly invoke leader_keep_alive when a candidate becomes a leader. if !self.lease_storage.is_primary() { - // FIXME: get leader address - let leader_addrs = vec![]; + let cluster = self.client.fetch_cluster(true).await?; + let leader_addrs: Vec<_> = cluster + .nodes + .into_iter() + .filter_map(|node| (node.node_id == cluster.leader_id).then_some(node.addr)) + .collect(); break self .follower_keep_alive(request_stream, &leader_addrs) .await?; @@ -346,8 +350,12 @@ impl Lease for LeaseServer { }; return Ok(tonic::Response::new(res)); } - let _leader_id = self.client.fetch_leader_id(false).await?; - let leader_addrs = vec![]; // FIXME: get leader address + let cluster = self.client.fetch_cluster(true).await?; + let leader_addrs: Vec<_> = cluster + .nodes + .into_iter() + .filter_map(|node| (node.node_id == cluster.leader_id).then_some(node.addr)) + .collect(); if !self.lease_storage.is_primary() { let endpoints = build_endpoints(&leader_addrs, self.client_tls_config.as_ref())?; let channel = tonic::transport::Channel::balance_list(endpoints.into_iter()); diff --git a/crates/xline/src/server/maintenance.rs b/crates/xline/src/server/maintenance.rs index 9fb9c4ce7..305a8ed59 100644 --- a/crates/xline/src/server/maintenance.rs +++ b/crates/xline/src/server/maintenance.rs @@ -114,8 +114,7 @@ impl Maintenance for MaintenanceServer { &self, _request: tonic::Request, ) -> Result, tonic::Status> { - // FIXME: get learner status - let is_learner = false; + let is_learner = self.raw_curp.is_learner(); let (leader, term, _) = self.raw_curp.leader(); let commit_index = self.raw_curp.commit_index(); let size = self.db.file_size().map_err(|e| { From f528c97fc1fcfb54aef43f74198d8f6df8068742 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sat, 14 Sep 2024 18:10:26 +0800 Subject: [PATCH 177/322] 2 --- crates/curp/src/server/raw_curp/member_impl.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 5f7dd1e54..4ae5493f3 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -22,8 +22,6 @@ use crate::server::StorageError; use super::RawCurp; use super::Role; -impl RawCurp {} - impl RawCurp { /// Generates new node ids /// TODO: makes sure that the ids are unique From 92904708a1affcf1f01d1cb4e1e8ecef631ebc1b Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sat, 14 Sep 2024 18:17:41 +0800 Subject: [PATCH 178/322] refactor: remove unused code in members.rs Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/members.rs | 383 ------------------------------------- 1 file changed, 383 deletions(-) diff --git a/crates/curp/src/members.rs b/crates/curp/src/members.rs index 2a15c966c..b083ad972 100644 --- a/crates/curp/src/members.rs +++ b/crates/curp/src/members.rs @@ -1,385 +1,2 @@ -use std::{ - collections::{hash_map::DefaultHasher, HashMap}, - hash::Hasher, - sync::{ - atomic::{AtomicU64, Ordering}, - Arc, - }, -}; - -use dashmap::{mapref::one::Ref, DashMap}; -use itertools::Itertools; -#[cfg(not(madsim))] -#[cfg(madsim)] -use utils::ClientTlsConfig; - -use crate::rpc::Member; - /// Server Id pub type ServerId = u64; - -/// Cluster member -impl Member { - /// Create a new `Member` - #[inline] - pub fn new( - id: ServerId, - name: impl Into, - peer_urls: impl Into>, - client_urls: impl Into>, - is_learner: bool, - ) -> Self { - Self { - id, - name: name.into(), - peer_urls: peer_urls.into(), - client_urls: client_urls.into(), - is_learner, - } - } - - /// Get member id - #[must_use] - #[inline] - pub fn id(&self) -> ServerId { - self.id - } - - /// Get member name - #[must_use] - #[inline] - pub fn name(&self) -> &str { - &self.name - } - - /// Get member addresses - #[must_use] - #[inline] - pub fn peer_urls(&self) -> &[String] { - self.peer_urls.as_slice() - } - - /// Is learner or not - #[must_use] - #[inline] - pub fn is_learner(&self) -> bool { - self.is_learner - } -} - -/// cluster members information -#[derive(Debug, Clone)] -pub struct ClusterInfo { - /// cluster id - cluster_id: u64, - /// current server id - member_id: ServerId, - /// all members information - members: DashMap, - /// cluster version - cluster_version: Arc, -} - -impl ClusterInfo { - /// Construct a new `ClusterInfo` - #[inline] - #[must_use] - pub fn new(cluster_id: u64, member_id: u64, members: Vec) -> Self { - Self { - cluster_id, - member_id, - members: members.into_iter().map(|m| (m.id, m)).collect(), - cluster_version: Arc::new(AtomicU64::new(0)), - } - } - - /// Construct a new `ClusterInfo` from members map - /// - /// # Panics - /// - /// panic if `all_members` is empty - #[inline] - #[must_use] - pub fn from_members_map( - all_members_peer_urls: HashMap>, - self_client_urls: impl Into>, - self_name: &str, - ) -> Self { - let mut member_id = 0; - let self_client_urls = self_client_urls.into(); - let members = DashMap::new(); - for (name, peer_urls) in all_members_peer_urls { - let id = Self::calculate_member_id(peer_urls.clone(), "", None); - let mut member = Member::new(id, name.clone(), peer_urls, [], false); - if name == self_name { - member_id = id; - member.client_urls = self_client_urls.clone(); - } - let _ig = members.insert(id, member); - } - debug_assert!(member_id != 0, "self_id should not be 0"); - let mut cluster_info = Self { - cluster_id: 0, - member_id, - members, - cluster_version: Arc::new(AtomicU64::new(0)), - }; - cluster_info.gen_cluster_id(); - cluster_info - } - - /// Get all members - #[must_use] - #[inline] - pub fn all_members(&self) -> HashMap { - self.members - .iter() - .map(|t| (t.id, t.value().clone())) - .collect() - } - - /// Get all members vec - #[must_use] - #[inline] - pub fn all_members_vec(&self) -> Vec { - self.members.iter().map(|t| t.value().clone()).collect() - } - - /// Insert a member - #[inline] - #[must_use] - pub fn insert(&self, member: Member) -> Option { - self.members.insert(member.id, member) - } - - /// Remove a member - #[inline] - #[must_use] - pub fn remove(&self, id: &ServerId) -> Option { - self.members.remove(id).map(|(_id, m)| m) - } - - /// Get a member - #[inline] - #[must_use] - pub fn get(&self, id: &ServerId) -> Option> { - self.members.get(id) - } - - /// Update a member and return old addrs - #[inline] - pub fn update(&self, id: &ServerId, addrs: impl Into>) -> Vec { - let mut addrs = addrs.into(); - let mut member = self - .members - .get_mut(id) - .unwrap_or_else(|| unreachable!("member {} not found", id)); - std::mem::swap(&mut addrs, &mut member.peer_urls); - addrs - } - - /// Get server peer urls via server id - #[must_use] - #[inline] - pub fn peer_urls(&self, id: ServerId) -> Option> { - self.members.get(&id).map(|t| t.peer_urls.clone()) - } - - /// Get server client urls via server id - #[must_use] - #[inline] - pub fn client_urls(&self, id: ServerId) -> Option> { - self.members.get(&id).map(|t| t.client_urls.clone()) - } - - /// Get the current member - /// - /// # Panics - /// - /// panic if self member id is not in members - #[allow(clippy::unwrap_used)] // self member id must be in members - #[must_use] - #[inline] - pub fn self_member(&self) -> Ref<'_, u64, Member> { - self.members.get(&self.member_id).unwrap() - } - - /// Get the current server peer urls - #[must_use] - #[inline] - pub fn self_peer_urls(&self) -> Vec { - self.self_member().peer_urls.clone() - } - - /// Get the current server client addrs - #[must_use] - #[inline] - pub fn self_client_urls(&self) -> Vec { - self.self_member().client_urls.clone() - } - - /// Get the current server name - #[must_use] - #[inline] - pub fn self_name(&self) -> String { - self.self_member().name.clone() - } - - /// Get peers ids - #[must_use] - #[inline] - pub fn peers_ids(&self) -> Vec { - self.members - .iter() - .filter(|t| t.id != self.member_id) - .map(|t| t.id) - .collect() - } - - /// Get all ids - #[must_use] - #[inline] - pub fn all_ids(&self) -> Vec { - self.members.iter().map(|t| t.id).collect() - } - - /// Calculate the member id - #[inline] - #[must_use] - pub fn calculate_member_id( - mut addrs: Vec, - cluster_name: &str, - timestamp: Option, - ) -> ServerId { - let mut hasher = DefaultHasher::new(); - // to make sure same addrs but different order will get same id - addrs.sort(); - for addr in addrs { - hasher.write(addr.as_bytes()); - } - hasher.write(cluster_name.as_bytes()); - if let Some(ts) = timestamp { - hasher.write_u64(ts); - } - hasher.finish() - } - - /// Calculate the cluster id - fn gen_cluster_id(&mut self) { - let mut hasher = DefaultHasher::new(); - for id in self.members.iter().map(|t| t.id).sorted() { - hasher.write_u64(id); - } - self.cluster_id = hasher.finish(); - } - - /// Get member id - #[must_use] - #[inline] - pub fn self_id(&self) -> ServerId { - self.member_id - } - - /// Get cluster id - #[must_use] - #[inline] - pub fn cluster_id(&self) -> u64 { - self.cluster_id - } - - /// Get cluster version - #[must_use] - #[inline] - pub fn cluster_version(&self) -> u64 { - self.cluster_version.load(Ordering::Relaxed) - } - - /// Get peers - #[must_use] - #[inline] - pub fn peers_addrs(&self) -> HashMap> { - self.members - .iter() - .filter(|t| t.id != self.member_id) - .map(|t| (t.id, t.peer_urls.clone())) - .collect() - } - - /// Get all members - #[must_use] - #[inline] - pub fn all_members_peer_urls(&self) -> HashMap> { - self.members - .iter() - .map(|t| (t.id, t.peer_urls.clone())) - .collect() - } - - /// Get length of peers - #[must_use] - #[inline] - pub fn voters_len(&self) -> usize { - self.members.iter().filter(|t| !t.is_learner).count() - } - - /// Get id by name - #[must_use] - #[inline] - #[cfg(test)] - pub fn get_id_by_name(&self, name: &str) -> Option { - self.members - .iter() - .find_map(|m| (m.name == name).then_some(m.id)) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_calculate_id() { - let all_members = HashMap::from([ - ("S1".to_owned(), vec!["S1".to_owned()]), - ("S2".to_owned(), vec!["S2".to_owned()]), - ("S3".to_owned(), vec!["S3".to_owned()]), - ]); - - let node1 = ClusterInfo::from_members_map(all_members.clone(), [], "S1"); - let node2 = ClusterInfo::from_members_map(all_members.clone(), [], "S2"); - let node3 = ClusterInfo::from_members_map(all_members, [], "S3"); - - assert_ne!(node1.self_id(), node2.self_id()); - assert_ne!(node1.self_id(), node3.self_id()); - assert_ne!(node3.self_id(), node2.self_id()); - - assert_eq!(node1.cluster_id(), node2.cluster_id()); - assert_eq!(node3.cluster_id(), node2.cluster_id()); - } - - #[test] - fn test_get_peers() { - let all_members = HashMap::from([ - ("S1".to_owned(), vec!["S1".to_owned()]), - ("S2".to_owned(), vec!["S2".to_owned()]), - ("S3".to_owned(), vec!["S3".to_owned()]), - ]); - - let node1 = ClusterInfo::from_members_map(all_members, [], "S1"); - let peers = node1.peers_addrs(); - let node1_id = node1.self_id(); - let node1_url = node1.self_peer_urls(); - assert!(!peers.contains_key(&node1_id)); - assert_eq!(peers.len(), 2); - assert_eq!(node1.voters_len(), peers.len() + 1); - - let peer_urls = peers.values().collect::>(); - - let peer_ids = node1.peers_ids(); - - assert_eq!(peer_ids.len(), peer_urls.len()); - - assert!(peer_urls.iter().find(|url| ***url == node1_url).is_none()); - assert!(peer_ids.iter().find(|id| **id == node1_id).is_none()); - } -} From 8b62aa8f311dab85d32733ce712b4e60835797f8 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 18 Sep 2024 08:55:00 +0800 Subject: [PATCH 179/322] chore: fix curp group Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/tests/it/common/curp_group.rs | 46 +++++++++-------------- 1 file changed, 17 insertions(+), 29 deletions(-) diff --git a/crates/curp/tests/it/common/curp_group.rs b/crates/curp/tests/it/common/curp_group.rs index d39d29e2c..2bb4ee5ee 100644 --- a/crates/curp/tests/it/common/curp_group.rs +++ b/crates/curp/tests/it/common/curp_group.rs @@ -15,7 +15,7 @@ use curp::{ client::{ClientApi, ClientBuilder}, error::ServerError, member::MembershipInfo, - members::{ClusterInfo, ServerId}, + members::ServerId, rpc::{InnerProtocolServer, Member, ProtocolServer}, server::{ conflict::test_pools::{TestSpecPool, TestUncomPool}, @@ -116,28 +116,23 @@ impl CurpGroup { assert!(n_nodes >= 3, "the number of nodes must >= 3"); let mut listeners = Self::gen_listeners(configs.keys()).await; let all_members_addrs = Self::listeners_to_all_members_addrs(&listeners); + let init_members: BTreeMap<_, _> = all_members_addrs + .into_iter() + .enumerate() + .map(|(id, (_, addrs))| (id as u64, addrs[0].clone())) + .collect(); let mut nodes = HashMap::new(); let client_tls_config = None; let server_tls_config = None; for (node_id, (name, (config, xline_storage_config))) in configs.into_iter().enumerate() { + let node_id = node_id as u64; let task_manager = Arc::new(TaskManager::new()); let snapshot_allocator = Self::get_snapshot_allocator_from_cfg(&config); - let cluster_info = Arc::new(ClusterInfo::from_members_map( - all_members_addrs.clone(), - [], - &name, - )); - let init_members = all_members_addrs - .values() - .map(|addrs| addrs[0].clone()) - .enumerate() - .map(|(id, addr)| (id as u64, addr)) - .collect(); - let membership_info = MembershipInfo::new(node_id as u64, init_members); + + let self_addr = init_members.get(&node_id).unwrap().clone(); + let membership_info = MembershipInfo::new(node_id, init_members.clone()); let listener = listeners.remove(&name).unwrap(); - let id = cluster_info.self_id(); - let addr = cluster_info.self_peer_urls().pop().unwrap(); let (exe_tx, exe_rx) = mpsc::unbounded_channel(); let (as_tx, as_rx) = mpsc::unbounded_channel(); @@ -168,8 +163,8 @@ impl CurpGroup { let ig = Self::run(server, listener, n).await; }); let curp_node = CurpNode { - id, - addr, + id: node_id, + addr: self_addr, exe_rx, as_rx, role_change_arc, @@ -243,12 +238,12 @@ impl CurpGroup { &mut self, listener: TcpListener, name: String, - cluster_info: Arc, + membership_info: MembershipInfo, ) { self.run_node_with_config( listener, name, - cluster_info, + membership_info, Arc::new(CurpConfig::default()), EngineConfig::default(), ) @@ -259,7 +254,7 @@ impl CurpGroup { &mut self, listener: TcpListener, name: String, - cluster_info: Arc, + membership_info: MembershipInfo, config: Arc, xline_storage_config: EngineConfig, ) { @@ -276,19 +271,12 @@ impl CurpGroup { xline_storage_config, )); - let id = cluster_info.self_id(); + let id = membership_info.node_id; let role_change_cb = TestRoleChange::default(); let role_change_arc = role_change_cb.get_inner_arc(); let curp_storage = Arc::new(DB::open(&config.engine_cfg).unwrap()); - // TODO: remove cluster info and build the membership info from start - let init_members: BTreeMap<_, _> = cluster_info - .all_members_peer_urls() - .values() - .map(|addrs| addrs[0].clone()) - .enumerate() - .map(|(id, addr)| (id as u64, addr)) - .collect(); + let init_members = membership_info.init_members; let node_id = init_members.len(); let membership_info = MembershipInfo::new(node_id as u64, init_members); let server = Arc::new(Rpc::new( From 6dfc21d4baec6a76d9eda09c792df77d40cd853c Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 18 Sep 2024 16:09:42 +0800 Subject: [PATCH 180/322] refactor: add `NodeMetadata` to membership config Previously, Xline required a single address string in the membership config. This commit introduces a `NodeMetadata` type to store all node metadata, including: - name: The node's name. - peer_addrs: URLs for peer-to-peer communication. - client_addrs: URLs for client communication. Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/build.rs | 4 + crates/curp/proto/common | 2 +- crates/curp/src/client/connect.rs | 10 +- crates/curp/src/client/fetch.rs | 7 +- crates/curp/src/client/keep_alive.rs | 19 ++- crates/curp/src/client/mod.rs | 13 +- crates/curp/src/client/retry.rs | 6 +- crates/curp/src/client/tests.rs | 16 +- crates/curp/src/client/unary/mod.rs | 10 +- crates/curp/src/member.rs | 17 ++- crates/curp/src/rpc/mod.rs | 62 +++++++- .../curp/src/server/curp_node/member_impl.rs | 11 +- crates/curp/src/server/curp_node/mod.rs | 7 +- .../curp/src/server/raw_curp/member_impl.rs | 2 +- crates/curp/src/server/raw_curp/mod.rs | 9 +- crates/curp/tests/it/common/curp_group.rs | 13 +- crates/utils/src/config.rs | 141 +++++++++++++++--- crates/utils/src/parser.rs | 33 ++-- crates/xline-client/examples/member.rs | 8 +- crates/xline-client/src/clients/member.rs | 51 ++++++- crates/xline-client/src/clients/mod.rs | 2 +- crates/xline-client/tests/it/member.rs | 6 +- crates/xline-test-utils/src/lib.rs | 53 ++++--- crates/xline/src/server/lease_server.rs | 25 ++-- crates/xline/src/server/xline_server.rs | 22 ++- crates/xline/src/utils/args.rs | 13 +- 26 files changed, 415 insertions(+), 147 deletions(-) diff --git a/crates/curp/build.rs b/crates/curp/build.rs index 96985ca5a..b1d150257 100644 --- a/crates/curp/build.rs +++ b/crates/curp/build.rs @@ -4,6 +4,10 @@ fn main() { "ProposeConfChangeRequest.ConfChange", "#[derive(serde::Deserialize, serde::Serialize)]", ) + .type_attribute( + "NodeMetadata", + "#[derive(serde::Deserialize, serde::Serialize, Eq, Hash)]", + ) .compile( &[ "./proto/common/src/curp-command.proto", diff --git a/crates/curp/proto/common b/crates/curp/proto/common index 4cb05f81a..c38721ded 160000 --- a/crates/curp/proto/common +++ b/crates/curp/proto/common @@ -1 +1 @@ -Subproject commit 4cb05f81af407874fd31a322f3a09bd8a5118509 +Subproject commit c38721deddae771fb13b558b603182b40528f68d diff --git a/crates/curp/src/client/connect.rs b/crates/curp/src/client/connect.rs index 9fcfb7741..a54e16388 100644 --- a/crates/curp/src/client/connect.rs +++ b/crates/curp/src/client/connect.rs @@ -3,7 +3,7 @@ use curp_external_api::cmd::Command; use crate::{ members::ServerId, - rpc::{FetchMembershipResponse, ReadState}, + rpc::{FetchMembershipResponse, NodeMetadata, ReadState}, }; use super::retry::Context; @@ -60,7 +60,7 @@ pub trait ClientApi { } /// Add some learners to the cluster. - async fn add_learner(&self, addrs: Vec) -> Result, Self::Error>; + async fn add_learner(&self, nodes: Vec) -> Result, Self::Error>; /// Remove some learners from the cluster. async fn remove_learner(&self, ids: Vec) -> Result<(), Self::Error>; @@ -105,7 +105,11 @@ pub(crate) trait RepeatableClientApi { ) -> Result; /// Add some learners to the cluster. - async fn add_learner(&self, addrs: Vec, ctx: Context) -> Result, Self::Error>; + async fn add_learner( + &self, + nodes: Vec, + ctx: Context, + ) -> Result, Self::Error>; /// Remove some learners from the cluster. async fn remove_learner(&self, ids: Vec, ctx: Context) -> Result<(), Self::Error>; diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index a5f43e259..2f07fb801 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -151,7 +151,10 @@ mod test { use crate::{ client::{cluster_state::ForEachServer, config::Config, tests::init_mocked_connects}, - rpc::{self, connect::ConnectApi, CurpError, FetchMembershipResponse, Member, Node}, + rpc::{ + self, connect::ConnectApi, CurpError, FetchMembershipResponse, Member, Node, + NodeMetadata, + }, }; use super::Fetch; @@ -183,7 +186,7 @@ mod test { .into_iter() .map(|node_id| Node { node_id, - addr: String::new(), + meta: Some(NodeMetadata::default()), }) .collect(); let qs = rpc::QuorumSet { set: members }; diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index 31b62b470..c7e597aea 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -158,10 +158,11 @@ mod tests { connect::{ConnectApi, MockConnectApi}, AddLearnerRequest, AddLearnerResponse, AddMemberRequest, AddMemberResponse, CurpError, FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, - FetchReadStateResponse, Member, MoveLeaderRequest, MoveLeaderResponse, OpResponse, - ProposeId, ProposeRequest, ProposeResponse, ReadIndexResponse, RecordRequest, - RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, RemoveMemberRequest, - RemoveMemberResponse, ResponseOp, ShutdownRequest, ShutdownResponse, SyncedResponse, + FetchReadStateResponse, Member, MoveLeaderRequest, MoveLeaderResponse, NodeMetadata, + OpResponse, ProposeId, ProposeRequest, ProposeResponse, ReadIndexResponse, + RecordRequest, RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, + RemoveMemberRequest, RemoveMemberResponse, ResponseOp, ShutdownRequest, + ShutdownResponse, SyncedResponse, }, }; @@ -331,7 +332,15 @@ mod tests { term: u64, ) -> KeepAliveHandle { let members = (0..5).collect::>(); - let nodes = members.iter().map(|id| (*id, format!("{id}"))).collect(); + let nodes = members + .iter() + .map(|id| { + ( + *id, + NodeMetadata::new(format!("{id}"), vec!["addr"], vec!["addr"]), + ) + }) + .collect(); let state = ClusterState::Ready(ClusterStateReady::new( leader, term, diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 0f0d0429a..42e15ff9f 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -64,7 +64,7 @@ use crate::{ rpc::{ self, connect::{BypassedConnect, ConnectApi}, - FetchMembershipResponse, Node, ProposeId, Protocol, + FetchMembershipResponse, ProposeId, Protocol, }, server::StreamingProtocol, tracker::Tracker, @@ -223,15 +223,20 @@ impl ClientBuilder { &self, bypassed: Option<(u64, Arc)>, ) -> impl ConnectToCluster { - // TODO: distinguish peer urls / client urls let tls_config = self.tls_config.clone(); + let is_raw_curp = self.is_raw_curp; move |resp: &FetchMembershipResponse| -> HashMap> { resp.nodes .clone() .into_iter() .map(|node| { - let Node { node_id, addr } = node; - let connect = rpc::connect(node_id, vec![addr], tls_config.clone()); + let (node_id, meta) = node.into_parts(); + let addrs = if is_raw_curp { + meta.into_peer_urls() + } else { + meta.into_client_urls() + }; + let connect = rpc::connect(node_id, addrs, tls_config.clone()); (node_id, connect) }) .chain(bypassed.clone()) diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 962887915..5783a3805 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -19,7 +19,7 @@ use super::{ }; use crate::{ members::ServerId, - rpc::{CurpError, ReadState, Redirect, ProposeId, FetchMembershipResponse}, tracker::Tracker, + rpc::{CurpError, ReadState, Redirect, ProposeId, FetchMembershipResponse, NodeMetadata}, tracker::Tracker, }; /// Backoff config @@ -434,8 +434,8 @@ where } /// Add some learners to the cluster. - async fn add_learner(&self, addrs: Vec) -> Result, Self::Error> { - self.retry::<_, _>(|client, ctx| client.add_learner(addrs.clone(), ctx)) + async fn add_learner(&self, nodes: Vec) -> Result, Self::Error> { + self.retry::<_, _>(|client, ctx| client.add_learner(nodes.clone(), ctx)) .await } diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index 3bcbaf5ea..7f24a49d7 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -26,8 +26,8 @@ use crate::{ rpc::{ self, connect::{ConnectApi, MockConnectApi}, - CurpError, FetchMembershipResponse, Node, OpResponse, ProposeId, ProposeResponse, - ReadIndexResponse, RecordResponse, ResponseOp, SyncedResponse, + CurpError, FetchMembershipResponse, Node, NodeMetadata, OpResponse, ProposeId, + ProposeResponse, ReadIndexResponse, RecordResponse, ResponseOp, SyncedResponse, }, }; @@ -87,7 +87,15 @@ fn build_empty_response() -> OpResponse { fn build_default_membership() -> Membership { let members = (0..5).collect::>(); - let nodes = members.iter().map(|id| (*id, format!("{id}"))).collect(); + let nodes = members + .iter() + .map(|id| { + ( + *id, + NodeMetadata::new(format!("{id}"), vec!["addr"], vec!["addr"]), + ) + }) + .collect(); Membership::new(vec![members], nodes) } @@ -104,7 +112,7 @@ fn build_membership_resp( .into_iter() .map(|node_id| Node { node_id, - addr: String::new(), + meta: Some(NodeMetadata::default()), }) .collect(); let qs = rpc::QuorumSet { set: members }; diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 9f20f1374..224396a24 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -14,7 +14,7 @@ use super::{ }; use crate::rpc::{ AddLearnerRequest, AddMemberRequest, CurpError, FetchReadStateRequest, MoveLeaderRequest, - ReadState, RemoveLearnerRequest, RemoveMemberRequest, ShutdownRequest, + NodeMetadata, ReadState, RemoveLearnerRequest, RemoveMemberRequest, ShutdownRequest, }; /// The unary client @@ -110,8 +110,12 @@ impl RepeatableClientApi for Unary { } /// Add some learners to the cluster. - async fn add_learner(&self, addrs: Vec, ctx: Context) -> Result, Self::Error> { - let req = AddLearnerRequest { node_addrs: addrs }; + async fn add_learner( + &self, + nodes: Vec, + ctx: Context, + ) -> Result, Self::Error> { + let req = AddLearnerRequest { nodes }; let timeout = self.config.wait_synced_timeout(); let resp = ctx .cluster_state() diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 3d5ce7fc7..29d1b6c14 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -13,6 +13,7 @@ use serde::Serialize; use crate::quorum::Joint; use crate::quorum::QuorumSet; +use crate::rpc::NodeMetadata; /// The membership info, used to build the initial states #[derive(Debug, Clone)] @@ -21,14 +22,14 @@ pub struct MembershipInfo { /// The id of current node pub node_id: u64, /// The initial cluster members - pub init_members: BTreeMap, + pub init_members: BTreeMap, } impl MembershipInfo { /// Creates a new `MembershipInfo` #[inline] #[must_use] - pub fn new(node_id: u64, init_members: BTreeMap) -> Self { + pub fn new(node_id: u64, init_members: BTreeMap) -> Self { Self { node_id, init_members, @@ -173,13 +174,13 @@ pub(crate) struct Membership { /// Member of the cluster pub(crate) members: Vec>, /// All Nodes, including members and learners - pub(crate) nodes: BTreeMap, + pub(crate) nodes: BTreeMap, } impl Membership { #[cfg(test)] /// Creates a new `Membership` - pub(crate) fn new(members: Vec>, nodes: BTreeMap) -> Self { + pub(crate) fn new(members: Vec>, nodes: BTreeMap) -> Self { Self { members, nodes } } @@ -191,11 +192,11 @@ impl Membership { Change::AddLearner(learners) => { let members = self.members.clone(); let mut nodes = self.nodes.clone(); - for (id, addr) in learners { + for (id, meta) in learners { match nodes.entry(id) { Entry::Occupied(_) => return vec![], Entry::Vacant(e) => { - let _ignore = e.insert(addr); + let _ignore = e.insert(meta); } } } @@ -291,7 +292,7 @@ impl Membership { } /// Gets the addresses of all members - pub(crate) fn members(&self) -> impl Iterator { + pub(crate) fn members(&self) -> impl Iterator { self.nodes.iter().filter_map(|(id, addr)| { self.members .iter() @@ -311,7 +312,7 @@ impl Membership { #[derive(Clone)] pub(crate) enum Change { /// Adds learners - AddLearner(Vec<(u64, String)>), + AddLearner(Vec<(u64, NodeMetadata)>), /// Removes learners RemoveLearner(Vec), /// Adds members diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index fa18d4f83..549b5c66a 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -40,6 +40,7 @@ pub use self::proto::{ MoveLeaderRequest, MoveLeaderResponse, Node, + NodeMetadata, OpResponse, OptionalU64, ProposeId as PbProposeId, @@ -803,7 +804,66 @@ impl FetchMembershipResponse { .into_iter() .map(|m| m.set.into_iter().collect()) .collect(), - nodes: nodes.into_iter().map(|n| (n.node_id, n.addr)).collect(), + nodes: nodes.into_iter().map(Node::into_parts).collect(), } } } + +impl Node { + /// Unwraps self + #[allow(clippy::unwrap_used, clippy::missing_panics_doc)] // convert rpc types + #[inline] + #[must_use] + pub fn into_parts(self) -> (u64, NodeMetadata) { + let Node { node_id, meta } = self; + (node_id, meta.unwrap()) + } +} + +impl NodeMetadata { + /// Creates a new `NodeMetadata` + #[inline] + #[must_use] + pub fn new(name: String, peer_urls: Vec, client_urls: Vec) -> Self { + Self { + name, + peer_urls, + client_urls, + } + } + + /// Returns the name of the learner node. + #[inline] + #[must_use] + pub fn name(&self) -> &str { + &self.name + } + + /// Returns a reference to the list of peer URLs. + #[inline] + #[must_use] + pub fn peer_urls(&self) -> &[String] { + &self.peer_urls + } + + /// Returns a reference to the list of client URLs. + #[inline] + #[must_use] + pub fn client_urls(&self) -> &[String] { + &self.client_urls + } + + /// Converts the `self` instance into a vector of peer URLs. + #[inline] + #[must_use] + pub fn into_peer_urls(self) -> Vec { + self.peer_urls + } + + /// Converts the `self` instance into a vector of client URLs. + #[inline] + #[must_use] + pub fn into_client_urls(self) -> Vec { + self.client_urls + } +} diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index a7fdb1dc4..e7b6360a8 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -30,12 +30,11 @@ impl, RC: RoleChange> CurpNode { &self, request: AddLearnerRequest, ) -> Result { - let node_addrs = request.node_addrs; - let node_ids = self.curp.new_node_ids(node_addrs.len()); - self.update_and_wait(Change::AddLearner( - node_ids.clone().into_iter().zip(node_addrs).collect(), - )) - .await?; + let node_ids = self.curp.new_node_ids(request.nodes.len()); + let ids_with_meta = node_ids.clone().into_iter().zip(request.nodes).collect(); + + self.update_and_wait(Change::AddLearner(ids_with_meta)) + .await?; Ok(AddLearnerResponse { node_ids }) } diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 57f689a13..2c84dd8da 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -386,7 +386,10 @@ impl, RC: RoleChange> CurpNode { .collect(); let nodes = nodes .into_iter() - .map(|(node_id, addr)| Node { node_id, addr }) + .map(|(node_id, meta)| Node { + node_id, + meta: Some(meta), + }) .collect(); let leader_id = @@ -768,7 +771,7 @@ impl, RC: RoleChange> CurpNode { .init_members .clone() .into_iter() - .map(|(id, addr)| (id, vec![addr])) + .map(|(id, meta)| (id, meta.into_peer_urls())) .collect(); let member_connects = rpc::inner_connects(peer_addrs, client_tls_config.as_ref()).collect(); let cmd_board = Arc::new(RwLock::new(CommandBoard::new())); diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 4ae5493f3..2f195cda3 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -123,7 +123,7 @@ impl RawCurp { let nodes = config .nodes .iter() - .map(|(id, addr)| (*id, vec![addr.clone()])) + .map(|(id, meta)| (*id, meta.peer_urls().to_vec())) .collect(); inner_connects(nodes, self.client_tls_config()).collect() diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index ef9643d17..50e7db88f 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -1167,12 +1167,13 @@ impl RawCurp { !self.ms.read().is_member() } - #[allow(clippy::unused_self)] #[cfg(test)] /// Get cluster id by it's name - pub(super) fn get_id_by_name(&self, _name: impl AsRef) -> Option { - // FIXME: implement logic - None + pub(super) fn get_id_by_name(&self, name: impl AsRef) -> Option { + self.effective_membership() + .nodes + .into_iter() + .find_map(|(id, n)| (n.name() == name.as_ref()).then_some(id)) } /// Get self's id diff --git a/crates/curp/tests/it/common/curp_group.rs b/crates/curp/tests/it/common/curp_group.rs index 2bb4ee5ee..471ee65f3 100644 --- a/crates/curp/tests/it/common/curp_group.rs +++ b/crates/curp/tests/it/common/curp_group.rs @@ -16,7 +16,7 @@ use curp::{ error::ServerError, member::MembershipInfo, members::ServerId, - rpc::{InnerProtocolServer, Member, ProtocolServer}, + rpc::{InnerProtocolServer, Member, NodeMetadata, ProtocolServer}, server::{ conflict::test_pools::{TestSpecPool, TestUncomPool}, Rpc, DB, @@ -119,7 +119,12 @@ impl CurpGroup { let init_members: BTreeMap<_, _> = all_members_addrs .into_iter() .enumerate() - .map(|(id, (_, addrs))| (id as u64, addrs[0].clone())) + .map(|(id, (name, addrs))| { + ( + id as u64, + NodeMetadata::new(name, addrs.clone(), addrs.clone()), + ) + }) .collect(); let mut nodes = HashMap::new(); @@ -130,7 +135,7 @@ impl CurpGroup { let task_manager = Arc::new(TaskManager::new()); let snapshot_allocator = Self::get_snapshot_allocator_from_cfg(&config); - let self_addr = init_members.get(&node_id).unwrap().clone(); + let meta = init_members.get(&node_id).unwrap().clone(); let membership_info = MembershipInfo::new(node_id, init_members.clone()); let listener = listeners.remove(&name).unwrap(); @@ -164,7 +169,7 @@ impl CurpGroup { }); let curp_node = CurpNode { id: node_id, - addr: self_addr, + addr: meta.peer_urls()[0].clone(), exe_rx, as_rx, role_change_arc, diff --git a/crates/utils/src/config.rs b/crates/utils/src/config.rs index def0994cd..8940aa3f6 100644 --- a/crates/utils/src/config.rs +++ b/crates/utils/src/config.rs @@ -1,8 +1,4 @@ -use std::{ - collections::{BTreeMap, HashMap}, - path::PathBuf, - time::Duration, -}; +use std::{collections::HashMap, path::PathBuf, time::Duration}; use derive_builder::Builder; use getset::Getters; @@ -125,12 +121,40 @@ pub struct ClusterConfig { initial_cluster_state: InitialClusterState, /// Initial cluster members #[getset(get = "pub")] - initial_membership_info: BTreeMap, + initial_membership_info: HashMap, /// Node id #[getset(get = "pub")] node_id: u64, } +/// Inital node metadata config +#[allow(clippy::module_name_repetitions)] +#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Getters)] +pub struct NodeMetaConfig { + /// The id of the node + #[getset(get = "pub")] + id: u64, + /// URLs of the peers in the cluster + #[getset(get = "pub")] + peer_urls: Vec, + /// URLs of the clients connected to the cluster + #[getset(get = "pub")] + client_urls: Vec, +} + +impl NodeMetaConfig { + /// Creates a new `NodeMetaConfig`. + #[inline] + #[must_use] + pub fn new(id: u64, peer_urls: Vec, client_urls: Vec) -> Self { + Self { + id, + peer_urls, + client_urls, + } + } +} + impl Default for ClusterConfig { #[inline] fn default() -> Self { @@ -149,7 +173,14 @@ impl Default for ClusterConfig { client_config: ClientConfig::default(), server_timeout: ServerTimeout::default(), initial_cluster_state: InitialClusterState::default(), - initial_membership_info: BTreeMap::from([(0, "http://127.0.0.1:2379".to_owned())]), + initial_membership_info: HashMap::from([( + "default".to_owned(), + NodeMetaConfig::new( + 0, + vec!["http://127.0.0.1:2380".to_owned()], + vec!["http://127.0.0.1:2379".to_owned()], + ), + )]), node_id: 0, } } @@ -201,7 +232,7 @@ impl ClusterConfig { client_config: ClientConfig, server_timeout: ServerTimeout, initial_cluster_state: InitialClusterState, - initial_membership_info: BTreeMap, + initial_membership_info: HashMap, node_id: u64, ) -> Self { Self { @@ -1241,10 +1272,20 @@ mod tests { node2 = ['127.0.0.1:2380'] node3 = ['127.0.0.1:2381'] - [cluster.initial_membership_info] - 1 = '127.0.0.1:2379' - 2 = '127.0.0.1:2380' - 3 = '127.0.0.1:2381' + [cluster.initial_membership_info.node1] + id = 1 + peer_urls = ['127.0.0.1:2380'] + client_urls = ['127.0.0.1:2379'] + + [cluster.initial_membership_info.node2] + id = 2 + peer_urls = ['127.0.0.1:2480'] + client_urls = ['127.0.0.1:2479'] + + [cluster.initial_membership_info.node3] + id = 3 + peer_urls = ['127.0.0.1:2580'] + client_urls = ['127.0.0.1:2579'] [cluster.curp_config] heartbeat_interval = '200ms' @@ -1343,10 +1384,31 @@ mod tests { client_config, server_timeout, InitialClusterState::New, - BTreeMap::from([ - (1, "127.0.0.1:2379".to_owned()), - (2, "127.0.0.1:2380".to_owned()), - (3, "127.0.0.1:2381".to_owned()), + HashMap::from([ + ( + "node1".to_owned(), + NodeMetaConfig::new( + 1, + vec!["127.0.0.1:2380".to_owned()], + vec!["127.0.0.1:2379".to_owned()] + ) + ), + ( + "node2".to_owned(), + NodeMetaConfig::new( + 2, + vec!["127.0.0.1:2480".to_owned()], + vec!["127.0.0.1:2479".to_owned()] + ) + ), + ( + "node3".to_owned(), + NodeMetaConfig::new( + 3, + vec!["127.0.0.1:2580".to_owned()], + vec!["127.0.0.1:2579".to_owned()] + ) + ), ]), 1, ) @@ -1434,10 +1496,20 @@ mod tests { node2 = ['127.0.0.1:2380'] node3 = ['127.0.0.1:2381'] - [cluster.initial_membership_info] - 1 = '127.0.0.1:2379' - 2 = '127.0.0.1:2380' - 3 = '127.0.0.1:2381' + [cluster.initial_membership_info.node1] + id = 1 + peer_urls = ['127.0.0.1:2380'] + client_urls = ['127.0.0.1:2379'] + + [cluster.initial_membership_info.node2] + id = 2 + peer_urls = ['127.0.0.1:2480'] + client_urls = ['127.0.0.1:2479'] + + [cluster.initial_membership_info.node3] + id = 3 + peer_urls = ['127.0.0.1:2580'] + client_urls = ['127.0.0.1:2579'] [cluster.storage] @@ -1480,10 +1552,31 @@ mod tests { ClientConfig::default(), ServerTimeout::default(), InitialClusterState::default(), - BTreeMap::from([ - (1, "127.0.0.1:2379".to_owned()), - (2, "127.0.0.1:2380".to_owned()), - (3, "127.0.0.1:2381".to_owned()), + HashMap::from([ + ( + "node1".to_owned(), + NodeMetaConfig::new( + 1, + vec!["127.0.0.1:2380".to_owned()], + vec!["127.0.0.1:2379".to_owned()] + ) + ), + ( + "node2".to_owned(), + NodeMetaConfig::new( + 2, + vec!["127.0.0.1:2480".to_owned()], + vec!["127.0.0.1:2479".to_owned()] + ) + ), + ( + "node3".to_owned(), + NodeMetaConfig::new( + 3, + vec!["127.0.0.1:2580".to_owned()], + vec!["127.0.0.1:2579".to_owned()] + ) + ), ]), 1, ) diff --git a/crates/utils/src/parser.rs b/crates/utils/src/parser.rs index b05fbf268..e0047def1 100644 --- a/crates/utils/src/parser.rs +++ b/crates/utils/src/parser.rs @@ -9,7 +9,8 @@ use regex::Regex; use thiserror::Error; use crate::config::{ - ClusterRange, InitialClusterState, LevelConfig, MetricsPushProtocol, RotationConfig, + ClusterRange, InitialClusterState, LevelConfig, MetricsPushProtocol, NodeMetaConfig, + RotationConfig, }; /// seconds per minute @@ -80,20 +81,22 @@ pub fn parse_members(s: &str) -> Result>, ConfigPars /// /// Return error when pass wrong args #[inline] -pub fn parse_membership(s: &str) -> Result, ConfigParseError> { - // TODO: currently reuse `parse_members`. Rewrite this after the old membership change is - // removed. - let ms = parse_members(s)?; - ms.into_iter() - .map(|(k, v)| { - k.parse() - .ok() - .zip(v.into_iter().next()) - .ok_or(ConfigParseError::InvalidValue( - "parese membership error".to_owned(), - )) - }) - .collect::>() +#[allow(clippy::todo)] +pub fn parse_membership(_s: &str) -> Result, ConfigParseError> { + todo!() + //// TODO: currently reuse `parse_members`. Rewrite this after the old membership change is + //// removed. + //let ms = parse_members(s)?; + //ms.into_iter() + // .map(|(k, v)| { + // k.parse() + // .ok() + // .zip(v.into_iter().next()) + // .ok_or(ConfigParseError::InvalidValue( + // "parese membership error".to_owned(), + // )) + // }) + // .collect::>() } /// Parse `ClusterRange` from the given string diff --git a/crates/xline-client/examples/member.rs b/crates/xline-client/examples/member.rs index d2787cc53..ca348cd1d 100644 --- a/crates/xline-client/examples/member.rs +++ b/crates/xline-client/examples/member.rs @@ -1,5 +1,5 @@ use anyhow::Result; -use xline_client::{Client, ClientOptions}; +use xline_client::{clients::Node, Client, ClientOptions}; #[tokio::main] async fn main() -> Result<()> { @@ -10,9 +10,9 @@ async fn main() -> Result<()> { .await? .member_client(); - let ids = client - .add_learner(vec!["10.0.0.4:2379".to_owned(), "10.0.0.5:2379".to_owned()]) - .await?; + let node1 = Node::new("n1", vec!["10.0.0.4:2380"], vec!["10.0.0.4.2379"]); + let node2 = Node::new("n2", vec!["10.0.0.5:2380"], vec!["10.0.0.5.2379"]); + let ids = client.add_learner(vec![node1, node2]).await?; println!("got node ids of new learners: {ids:?}"); diff --git a/crates/xline-client/src/clients/member.rs b/crates/xline-client/src/clients/member.rs index 173be4bef..7a69619c1 100644 --- a/crates/xline-client/src/clients/member.rs +++ b/crates/xline-client/src/clients/member.rs @@ -49,9 +49,9 @@ impl MemberClient { /// } /// ``` #[inline] - pub async fn add_learner(&self, addrs: Vec) -> Result> { + pub async fn add_learner(&self, nodes: Vec) -> Result> { self.curp_client - .add_learner(addrs) + .add_learner(nodes.into_iter().map(Into::into).collect()) .await .map_err(Into::into) } @@ -97,3 +97,50 @@ impl std::fmt::Debug for MemberClient { f.debug_struct("MemberClient").finish() } } + +/// Represents a node in the cluster with its associated metadata. +#[derive(Clone, Debug)] +#[non_exhaustive] +pub struct Node { + /// Name of the node. + pub name: String, + /// List of URLs used for peer-to-peer communication. + pub peer_urls: Vec, + /// List of URLs used for client communication. + pub client_urls: Vec, +} + +impl Node { + /// Creates a new `Node` + #[inline] + #[must_use] + pub fn new(name: N, peer_urls: AS, client_urls: AS) -> Self + where + N: AsRef, + A: AsRef, + AS: IntoIterator, + { + Self { + name: name.as_ref().to_owned(), + peer_urls: peer_urls + .into_iter() + .map(|s| s.as_ref().to_owned()) + .collect(), + client_urls: client_urls + .into_iter() + .map(|s| s.as_ref().to_owned()) + .collect(), + } + } +} + +impl From for curp::rpc::NodeMetadata { + #[inline] + fn from(node: Node) -> Self { + curp::rpc::NodeMetadata { + name: node.name, + peer_urls: node.peer_urls, + client_urls: node.client_urls, + } + } +} diff --git a/crates/xline-client/src/clients/mod.rs b/crates/xline-client/src/clients/mod.rs index 634b257d6..9c7c24828 100644 --- a/crates/xline-client/src/clients/mod.rs +++ b/crates/xline-client/src/clients/mod.rs @@ -5,7 +5,7 @@ pub use kv::KvClient; pub use lease::LeaseClient; pub use lock::{LockClient, Session, Xutex}; pub use maintenance::MaintenanceClient; -pub use member::MemberClient; +pub use member::{MemberClient, Node}; pub use watch::WatchClient; /// Auth client. diff --git a/crates/xline-client/tests/it/member.rs b/crates/xline-client/tests/it/member.rs index fb570f9c9..b1c9c8f56 100644 --- a/crates/xline-client/tests/it/member.rs +++ b/crates/xline-client/tests/it/member.rs @@ -1,5 +1,5 @@ use test_macros::abort_on_panic; -use xline_client::error::Result; +use xline_client::{clients::Node, error::Result}; use super::common::get_cluster_client; @@ -9,8 +9,10 @@ async fn learner_add_and_remove_are_ok() -> Result<()> { let (_cluster, client) = get_cluster_client().await.unwrap(); let client = client.member_client(); + let node1 = Node::new("n1", vec!["10.0.0.4:2380"], vec!["10.0.0.4.2379"]); + let node2 = Node::new("n2", vec!["10.0.0.5:2380"], vec!["10.0.0.5.2379"]); let ids = client - .add_learner(vec!["10.0.0.4:2379".to_owned(), "10.0.0.5:2379".to_owned()]) + .add_learner(vec![node1, node2]) .await .expect("failed to add learners"); diff --git a/crates/xline-test-utils/src/lib.rs b/crates/xline-test-utils/src/lib.rs index 6c66a826e..36f1f2361 100644 --- a/crates/xline-test-utils/src/lib.rs +++ b/crates/xline-test-utils/src/lib.rs @@ -1,10 +1,4 @@ -use std::{ - collections::{BTreeMap, HashMap}, - env::temp_dir, - iter, - path::PathBuf, - sync::Arc, -}; +use std::{collections::HashMap, env::temp_dir, iter, path::PathBuf, sync::Arc}; use futures::future::join_all; use rand::{distributions::Alphanumeric, thread_rng, Rng}; @@ -17,7 +11,8 @@ use tokio::{ use tonic::transport::ClientTlsConfig; use utils::config::{ default_quota, AuthConfig, ClusterConfig, CompactConfig, EngineConfig, InitialClusterState, - LogConfig, MetricsConfig, StorageConfig, TlsConfig, TraceConfig, XlineServerConfig, + LogConfig, MetricsConfig, NodeMetaConfig, StorageConfig, TlsConfig, TraceConfig, + XlineServerConfig, }; use xline::server::XlineServer; use xline_client::types::{auth::PermissionType, range_end::RangeOption}; @@ -96,6 +91,19 @@ impl Cluster { let (xline_listener, curp_listener) = self.listeners.remove(0); let self_client_url = self.get_client_url(i); let self_peer_url = self.get_peer_url(i); + + let node_meta_config = self + .all_members_peer_urls + .clone() + .into_iter() + .zip(self.all_members_client_urls.clone()) + .enumerate() + .map(|(id, (peer_url, client_url))| { + let name = format!("server{id}"); + let config = NodeMetaConfig::new(id as u64, vec![peer_url], vec![client_url]); + (name, config) + }) + .collect(); let config = Self::merge_config( config, name, @@ -109,12 +117,7 @@ impl Cluster { .collect(), i == 0, InitialClusterState::New, - self.all_members_peer_urls - .clone() - .into_iter() - .enumerate() - .map(|(i, addr)| (i as u64, addr)) - .collect(), + node_meta_config, i as u64, ); @@ -176,6 +179,19 @@ impl Cluster { self.configs.push(base_config); let base_config = self.configs.last().unwrap(); + let node_meta_config = self + .all_members_peer_urls + .clone() + .into_iter() + .zip(self.all_members_client_urls.clone()) + .enumerate() + .map(|(id, (peer_url, client_url))| { + let name = format!("server{id}"); + let config = NodeMetaConfig::new(id as u64, vec![peer_url], vec![client_url]); + (name, config) + }) + .collect(); + let config = Self::merge_config( base_config, name, @@ -184,12 +200,7 @@ impl Cluster { peers, false, InitialClusterState::Existing, - self.all_members_peer_urls - .clone() - .into_iter() - .enumerate() - .map(|(i, addr)| (i as u64, addr)) - .collect(), + node_meta_config, idx as u64, ); @@ -294,7 +305,7 @@ impl Cluster { peers: HashMap>, is_leader: bool, initial_cluster_state: InitialClusterState, - initial_membership_info: BTreeMap, + initial_membership_info: HashMap, node_id: u64, ) -> XlineServerConfig { let old_cluster = base_config.cluster(); diff --git a/crates/xline/src/server/lease_server.rs b/crates/xline/src/server/lease_server.rs index 24549b1f9..2bfb815ea 100644 --- a/crates/xline/src/server/lease_server.rs +++ b/crates/xline/src/server/lease_server.rs @@ -308,13 +308,13 @@ impl Lease for LeaseServer { // We can directly invoke leader_keep_alive when a candidate becomes a leader. if !self.lease_storage.is_primary() { let cluster = self.client.fetch_cluster(true).await?; - let leader_addrs: Vec<_> = cluster - .nodes - .into_iter() - .filter_map(|node| (node.node_id == cluster.leader_id).then_some(node.addr)) - .collect(); + let Some(leader_meta) = cluster.nodes.into_iter().find_map(|node| { + (node.node_id == cluster.leader_id).then_some(node.into_parts().1) + }) else { + return Err(tonic::Status::internal("Leader not exist")); + }; break self - .follower_keep_alive(request_stream, &leader_addrs) + .follower_keep_alive(request_stream, leader_meta.peer_urls()) .await?; } }; @@ -351,13 +351,14 @@ impl Lease for LeaseServer { return Ok(tonic::Response::new(res)); } let cluster = self.client.fetch_cluster(true).await?; - let leader_addrs: Vec<_> = cluster - .nodes - .into_iter() - .filter_map(|node| (node.node_id == cluster.leader_id).then_some(node.addr)) - .collect(); + let Some(leader_meta) = cluster.nodes.into_iter().find_map(|node| { + (node.node_id == cluster.leader_id).then_some(node.into_parts().1) + }) else { + return Err(tonic::Status::internal("leader not found")); + }; if !self.lease_storage.is_primary() { - let endpoints = build_endpoints(&leader_addrs, self.client_tls_config.as_ref())?; + let endpoints = + build_endpoints(leader_meta.client_urls(), self.client_tls_config.as_ref())?; let channel = tonic::transport::Channel::balance_list(endpoints.into_iter()); let mut lease_client = LeaseClient::new(channel); return lease_client.lease_time_to_live(request).await; diff --git a/crates/xline/src/server/xline_server.rs b/crates/xline/src/server/xline_server.rs index 415869397..62c558c39 100644 --- a/crates/xline/src/server/xline_server.rs +++ b/crates/xline/src/server/xline_server.rs @@ -5,7 +5,7 @@ use clippy_utilities::{NumericCast, OverflowArithmetic}; use curp::{ client::ClientBuilder as CurpClientBuilder, member::{ClusterId, MembershipInfo}, - rpc::{InnerProtocolServer, ProtocolServer}, + rpc::{InnerProtocolServer, NodeMetadata, ProtocolServer}, server::{Rpc, DB as CurpDB}, }; use dashmap::DashMap; @@ -110,10 +110,18 @@ impl XlineServer { #[cfg(madsim)] let (client_tls_config, server_tls_config) = (None, None); let curp_storage = Arc::new(CurpDB::open(&cluster_config.curp_config().engine_cfg)?); - let membership_info = MembershipInfo::new( - *cluster_config.node_id(), - cluster_config.initial_membership_info().clone(), - ); + + let init_members = cluster_config + .initial_membership_info() + .clone() + .into_iter() + .map(|(name, conf)| { + let meta = + NodeMetadata::new(name, conf.peer_urls().clone(), conf.client_urls().clone()); + (*conf.id(), meta) + }) + .collect(); + let membership_info = MembershipInfo::new(*cluster_config.node_id(), init_members); Ok(Self { cluster_config, @@ -474,7 +482,7 @@ impl XlineServer { .init_members .values() .cloned() - .map(|addr| vec![addr]), + .map(NodeMetadata::into_peer_urls), ) .bypass(self.membership_info.node_id, curp_server.clone()) .build::()?, @@ -497,7 +505,9 @@ impl XlineServer { .init_members .get(&self.membership_info.node_id) .cloned() + .map(NodeMetadata::into_peer_urls) .into_iter() + .flatten() .collect(); Ok(( KvServer::new( diff --git a/crates/xline/src/utils/args.rs b/crates/xline/src/utils/args.rs index 928c405dc..7f575f041 100644 --- a/crates/xline/src/utils/args.rs +++ b/crates/xline/src/utils/args.rs @@ -1,9 +1,4 @@ -use std::{ - collections::{BTreeMap, HashMap}, - env, - path::PathBuf, - time::Duration, -}; +use std::{collections::HashMap, env, path::PathBuf, time::Duration}; use anyhow::Result; use clap::Parser; @@ -22,8 +17,8 @@ use utils::{ default_sync_victims_interval, default_watch_progress_notify_interval, AuthConfig, AutoCompactConfig, ClientConfig, ClusterConfig, CompactConfig, CurpConfigBuilder, EngineConfig, InitialClusterState, LevelConfig, LogConfig, MetricsConfig, - MetricsPushProtocol, RotationConfig, ServerTimeout, StorageConfig, TlsConfig, TraceConfig, - XlineServerConfig, + MetricsPushProtocol, NodeMetaConfig, RotationConfig, ServerTimeout, StorageConfig, + TlsConfig, TraceConfig, XlineServerConfig, }, parse_batch_bytes, parse_duration, parse_log_file, parse_log_level, parse_members, parse_membership, parse_metrics_push_protocol, parse_rotation, parse_state, ConfigFileError, @@ -220,7 +215,7 @@ pub struct ServerArgs { client_key_path: Option, /// Cluster membership. eg: 0=192.168.x.x:8080,1=192.168.x.x:8081 #[clap(long, value_parser = parse_membership)] - membership_info: BTreeMap, + membership_info: HashMap, /// The id of current node #[clap(long)] node_id: u64, From 9f8af841eb7759e9af8dc1193271221807903afc Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 18 Sep 2024 18:09:33 +0800 Subject: [PATCH 181/322] refactor: implement `parse_membership` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/utils/src/parser.rs | 86 ++++++++++++++++++++++++++++---------- 1 file changed, 64 insertions(+), 22 deletions(-) diff --git a/crates/utils/src/parser.rs b/crates/utils/src/parser.rs index e0047def1..98447dfa8 100644 --- a/crates/utils/src/parser.rs +++ b/crates/utils/src/parser.rs @@ -1,8 +1,4 @@ -use std::{ - collections::{BTreeMap, HashMap}, - path::PathBuf, - time::Duration, -}; +use std::{collections::HashMap, path::PathBuf, time::Duration}; use clippy_utilities::OverflowArithmetic; use regex::Regex; @@ -75,28 +71,47 @@ pub fn parse_members(s: &str) -> Result>, ConfigPars Ok(map) } -/// Parse members from string like "0=addr1,1=addr2,2=addr3" +/// Parse members from string like: +/// "`node1=id1#peer_url1,peer_url2#client_url1,client_url2;node2=id2#peer_url3,peer_url4#client_url3,client_url4`" /// /// # Errors /// /// Return error when pass wrong args #[inline] -#[allow(clippy::todo)] -pub fn parse_membership(_s: &str) -> Result, ConfigParseError> { - todo!() - //// TODO: currently reuse `parse_members`. Rewrite this after the old membership change is - //// removed. - //let ms = parse_members(s)?; - //ms.into_iter() - // .map(|(k, v)| { - // k.parse() - // .ok() - // .zip(v.into_iter().next()) - // .ok_or(ConfigParseError::InvalidValue( - // "parese membership error".to_owned(), - // )) - // }) - // .collect::>() +pub fn parse_membership(s: &str) -> Result, ConfigParseError> { + let parse_urls = |urls_str: &str| urls_str.split(',').map(str::to_owned).collect::>(); + let parse_meta = |meta_str: &str| { + let mut fields = meta_str.split('#'); + let id: u64 = fields + .next() + .ok_or_else(|| ConfigParseError::InvalidValue("node id not found".to_owned()))? + .parse() + .map_err(|e| ConfigParseError::InvalidValue(format!("node id parse failed: {e}")))?; + let peer_urls: Vec<_> = fields + .next() + .map(parse_urls) + .ok_or_else(|| ConfigParseError::InvalidValue("node peer_urls not found".to_owned()))?; + let client_urls: Vec<_> = fields.next().map(parse_urls).ok_or_else(|| { + ConfigParseError::InvalidValue("node client_urls not found".to_owned()) + })?; + + Ok::<_, ConfigParseError>((id, peer_urls, client_urls)) + }; + let parse_node = |node_str: &str| { + let mut node_split = node_str.split('='); + let name = node_split + .next() + .ok_or_else(|| ConfigParseError::InvalidValue("node name not found".to_owned()))? + .to_owned(); + let (id, peer_urls, client_urls) = + node_split.next().map(parse_meta).ok_or_else(|| { + ConfigParseError::InvalidValue("node metadata not found".to_owned()) + })??; + + Ok::<_, ConfigParseError>((name, NodeMetaConfig::new(id, peer_urls, client_urls))) + }; + + s.split(';').map(parse_node).collect::>() } /// Parse `ClusterRange` from the given string @@ -472,4 +487,31 @@ mod test { assert!(parse_log_file(".../path/with-spaces/log_file.log-123.456-789").is_err()); assert!(parse_log_file("~~/path/with-spaces/log_file.log-123.456-789").is_err()); } + + #[test] + fn test_parse_membership() { + let arg = "node1=1#10.0.0.1:2380,10.0.0.1:2480#10.0.0.1:2379,10.0.0.2:2379;node2=2#10.0.0.3:2380#10.0.0.3:2379"; + let result = parse_membership(arg).unwrap(); + assert_eq!( + result, + HashMap::from([ + ( + "node1".to_owned(), + NodeMetaConfig::new( + 1, + vec!["10.0.0.1:2380".to_owned(), "10.0.0.1:2480".to_owned()], + vec!["10.0.0.1:2379".to_owned(), "10.0.0.2:2379".to_owned()], + ) + ), + ( + "node2".to_owned(), + NodeMetaConfig::new( + 2, + vec!["10.0.0.3:2380".to_owned()], + vec!["10.0.0.3:2379".to_owned()], + ) + ) + ]) + ); + } } From ce9b9d4fb54151393c6c8dffa387f38b4be8dae6 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 19 Sep 2024 10:28:54 +0800 Subject: [PATCH 182/322] refactor: api of NodeMetadata::new Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/rpc/mod.rs | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index 549b5c66a..a581be62f 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -824,11 +824,22 @@ impl NodeMetadata { /// Creates a new `NodeMetadata` #[inline] #[must_use] - pub fn new(name: String, peer_urls: Vec, client_urls: Vec) -> Self { + pub fn new(name: N, peer_urls: AS, client_urls: AS) -> Self + where + N: AsRef, + A: AsRef, + AS: IntoIterator, + { Self { - name, - peer_urls, - client_urls, + name: name.as_ref().to_owned(), + peer_urls: peer_urls + .into_iter() + .map(|s| s.as_ref().to_owned()) + .collect(), + client_urls: client_urls + .into_iter() + .map(|s| s.as_ref().to_owned()) + .collect(), } } From c20290bd1e2b164faff94fd9b8c2b2d90fc1fb0f Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 19 Sep 2024 15:30:54 +0800 Subject: [PATCH 183/322] fix: prevent deadlock while holding `RawCurp.st` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/raw_curp/mod.rs | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 50e7db88f..22515e7df 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -691,21 +691,23 @@ impl RawCurp { } // validate term and set leader id - let st_r = self.st.upgradable_read(); - match st_r.term.cmp(&term) { - std::cmp::Ordering::Less => { - let mut st_w = RwLockUpgradableReadGuard::upgrade(st_r); - self.update_to_term_and_become_follower(&mut st_w, term); - st_w.leader_id = Some(leader_id); - } - std::cmp::Ordering::Equal => { - if st_r.leader_id.is_none() { + { + let st_r = self.st.upgradable_read(); + match st_r.term.cmp(&term) { + std::cmp::Ordering::Less => { let mut st_w = RwLockUpgradableReadGuard::upgrade(st_r); + self.update_to_term_and_become_follower(&mut st_w, term); st_w.leader_id = Some(leader_id); } - } - std::cmp::Ordering::Greater => { - return Err((st_r.term, self.log.read().commit_index + 1)) + std::cmp::Ordering::Equal => { + if st_r.leader_id.is_none() { + let mut st_w = RwLockUpgradableReadGuard::upgrade(st_r); + st_w.leader_id = Some(leader_id); + } + } + std::cmp::Ordering::Greater => { + return Err((st_r.term, self.log.read().commit_index + 1)) + } } } self.reset_election_tick(); From 6e9d12d0bda826f3717c48f6363aac9ab1d3eccb Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 19 Sep 2024 15:52:26 +0800 Subject: [PATCH 184/322] fix: use dummy id for initial cluster fetch Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 42e15ff9f..fb5b38b60 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -245,13 +245,15 @@ impl ClientBuilder { } /// Connect to members + #[allow(clippy::as_conversions)] // convert usize to u64 is legal fn connect_members(&self, tls_config: Option<&ClientTlsConfig>) -> ClusterStateInit { let all_members = self .init_nodes .clone() .unwrap_or_else(|| unreachable!("requires members")) .into_iter() - .map(|addrs| (0, addrs)) + .enumerate() + .map(|(dummy_id, addrs)| (dummy_id as u64, addrs)) .collect(); let connects = rpc::connects(all_members, tls_config) .map(|(_id, conn)| conn) From 8cedd5d3ee5a5cbe670bffbd60dbf0177e580f04 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 19 Sep 2024 16:05:07 +0800 Subject: [PATCH 185/322] fix: initial membership state - set `index_effective` to 0 to prevent truncation of effective - set `committed` to the initial config Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 29d1b6c14..d7d0255ed 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -63,9 +63,10 @@ impl NodeMembershipState { let node_id = info.node_id; let init_ms = info.into_membership(); let cluster_state = MembershipState { - effective: init_ms, - index_effective: 1, - committed: Membership::default(), + effective: init_ms.clone(), + index_effective: 0, + // The initial configuration considered as committed + committed: init_ms, }; Self { node_id, From 7603cee1a494f93b6de4c695ffb25cfbc6020fe0 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 19 Sep 2024 16:51:15 +0800 Subject: [PATCH 186/322] fix: replace `HashMap` with `BTreeMap` in `CurpGroup` `CurpGroup` use Iterator::enumerate to generate node ids and this requires consistent orders. Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/tests/it/common/curp_group.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/curp/tests/it/common/curp_group.rs b/crates/curp/tests/it/common/curp_group.rs index 471ee65f3..b36ed1155 100644 --- a/crates/curp/tests/it/common/curp_group.rs +++ b/crates/curp/tests/it/common/curp_group.rs @@ -109,7 +109,7 @@ impl CurpGroup { } async fn new_with_configs( - configs: HashMap, EngineConfig)>, + configs: BTreeMap, EngineConfig)>, leader_name: String, ) -> Self { let n_nodes = configs.len(); @@ -188,7 +188,7 @@ impl CurpGroup { } } - async fn gen_listeners(keys: impl Iterator) -> HashMap { + async fn gen_listeners(keys: impl Iterator) -> BTreeMap { join_all( keys.cloned() .map(|name| async { (name, TcpListener::bind("0.0.0.0:0").await.unwrap()) }), @@ -199,8 +199,8 @@ impl CurpGroup { } fn listeners_to_all_members_addrs( - listeners: &HashMap, - ) -> HashMap> { + listeners: &BTreeMap, + ) -> BTreeMap> { listeners .iter() .map(|(name, listener)| { From 4f99db74b95c24e1c2e8b6740900a120bd0ff120 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 19 Sep 2024 18:19:49 +0800 Subject: [PATCH 187/322] fix: membership calculation - remove member should not remove from nodes - in `all_coherent`, `iter::successors` should skip the initial membership Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index d7d0255ed..16e8d2723 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -2,7 +2,6 @@ use std::collections::btree_map::Entry; use std::collections::hash_map::DefaultHasher; use std::collections::BTreeMap; use std::collections::BTreeSet; -use std::collections::HashSet; use std::hash::Hash; use std::hash::Hasher; use std::iter; @@ -244,26 +243,16 @@ impl Membership { let next = Self::next_coherent(current, target.clone()); (current != &next).then_some(next) }) + .skip(1) .collect() } /// Generates a new coherent membership from a quorum set fn next_coherent(ms: &Self, set: BTreeSet) -> Self { let next = ms.as_joint_owned().coherent(set).into_inner(); - let original_ids = ms - .members - .iter() - .flat_map(BTreeSet::iter) - .collect::>(); - let next_ids = next.iter().flat_map(BTreeSet::iter).collect::>(); - let mut nodes = ms.nodes.clone(); - for id in original_ids.difference(&next_ids) { - let _ignore = nodes.remove(id); - } - Self { members: next, - nodes, + nodes: ms.nodes.clone(), } } From 787609b546ec4d54e0c1d1193afb600666d7523d Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 19 Sep 2024 19:08:46 +0800 Subject: [PATCH 188/322] fix: node role update - leader should update role to learner when removed - a node should update it's leader id when the leader is removed Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 7 ++++++- crates/curp/src/server/curp_node/member_impl.rs | 1 + crates/curp/src/server/raw_curp/member_impl.rs | 13 ++++++++++++- crates/curp/src/server/raw_curp/mod.rs | 4 ++-- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 16e8d2723..70f904618 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -89,10 +89,15 @@ impl NodeMembershipState { } /// Returns `true` if the current node is a member of the cluster - pub(crate) fn is_member(&self) -> bool { + pub(crate) fn is_self_member(&self) -> bool { self.cluster().effective().contains(self.node_id()) } + /// Returns `true` if the given node is a member of the cluster + pub(crate) fn is_member(&self, id: u64) -> bool { + self.cluster().effective().contains(id) + } + /// Returns `true` if the given node is a member of the cluster pub(crate) fn check_membership(&self, node_id: u64) -> bool { self.cluster().effective().contains(node_id) diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index e7b6360a8..717b02eac 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -94,6 +94,7 @@ impl, RC: RoleChange> CurpNode { let propose_id = self.curp.update_membership(config, spawn_sync)?; self.curp.wait_propose_ids(Some(propose_id)).await; } + self.curp.update_role_leader(); Ok(()) } diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 2f195cda3..715e84212 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -61,6 +61,12 @@ impl RawCurp { Ok(propose_id) } + /// Updates the role if the node is leader + pub(crate) fn update_role_leader(&self) { + let ms_r = self.ms.read(); + self.update_role(&ms_r); + } + /// Append membership entries pub(crate) fn append_membership( &self, @@ -107,13 +113,18 @@ impl RawCurp { /// Updates the role of the node based on the current membership state fn update_role(&self, current: &NodeMembershipState) { let mut st_w = self.st.write(); - if current.is_member() { + if current.is_self_member() { if matches!(st_w.role, Role::Learner) { st_w.role = Role::Follower; } } else { st_w.role = Role::Learner; } + + // updates leader id + if st_w.leader_id.map_or(false, |id| !current.is_member(id)) { + st_w.leader_id = None; + } } /// Creates connections for new membership configuration. diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 22515e7df..5912917bd 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -1133,7 +1133,7 @@ impl RawCurp { if st_w.role == Role::Leader { return None; } - if !self.ms.read().is_member() { + if !self.ms.read().is_self_member() { return None; } let mut cst_l = self.cst.lock(); @@ -1166,7 +1166,7 @@ impl RawCurp { /// Retruns `true` if the current node is a learner #[inline] pub fn is_learner(&self) -> bool { - !self.ms.read().is_member() + !self.ms.read().is_self_member() } #[cfg(test)] From c8753ebf5a513ea05511a31bfeeb0fb45a446faa Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 20 Sep 2024 09:36:10 +0800 Subject: [PATCH 189/322] fix: ensures leader in membership request handling Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/member_impl.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 717b02eac..1cdae258a 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -17,6 +17,7 @@ use crate::rpc::AddLearnerResponse; use crate::rpc::AddMemberRequest; use crate::rpc::AddMemberResponse; use crate::rpc::CurpError; +use crate::rpc::Redirect; use crate::rpc::RemoveLearnerRequest; use crate::rpc::RemoveLearnerResponse; use crate::rpc::RemoveMemberRequest; @@ -30,6 +31,7 @@ impl, RC: RoleChange> CurpNode { &self, request: AddLearnerRequest, ) -> Result { + self.ensure_leader()?; let node_ids = self.curp.new_node_ids(request.nodes.len()); let ids_with_meta = node_ids.clone().into_iter().zip(request.nodes).collect(); @@ -44,6 +46,7 @@ impl, RC: RoleChange> CurpNode { &self, request: RemoveLearnerRequest, ) -> Result { + self.ensure_leader()?; self.update_and_wait(Change::RemoveLearner(request.node_ids)) .await?; @@ -55,6 +58,7 @@ impl, RC: RoleChange> CurpNode { &self, request: AddMemberRequest, ) -> Result { + self.ensure_leader()?; self.update_and_wait(Change::AddMember(request.node_ids)) .await?; @@ -66,6 +70,7 @@ impl, RC: RoleChange> CurpNode { &self, request: RemoveMemberRequest, ) -> Result { + self.ensure_leader()?; self.update_and_wait(Change::RemoveMember(request.node_ids)) .await?; @@ -98,4 +103,16 @@ impl, RC: RoleChange> CurpNode { Ok(()) } + + /// Ensures that the current node is the leader + fn ensure_leader(&self) -> Result<(), CurpError> { + let (leader_id, term, is_leader) = self.curp.leader(); + if is_leader { + return Ok(()); + } + Err(CurpError::Redirect(Redirect { + leader_id: leader_id.map(Into::into), + term, + })) + } } From 8766fdd9dec66083b7d4de6dd35ce6e32b21a36d Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 20 Sep 2024 09:55:07 +0800 Subject: [PATCH 190/322] chore: remove unused field in `ClientBuilder` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/mod.rs | 20 -------------------- crates/curp/tests/it/server.rs | 7 ++----- 2 files changed, 2 insertions(+), 25 deletions(-) diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index fb5b38b60..d56fd5e0b 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -106,14 +106,10 @@ impl Drop for ProposeIdGuard<'_> { #[derive(Debug, Clone, Default)] #[allow(clippy::module_name_repetitions)] // better than just Builder pub struct ClientBuilder { - /// initial cluster version - cluster_version: Option, /// initial cluster members init_nodes: Option>>, /// is current client send request to raw curp server is_raw_curp: bool, - /// initial leader state - leader_state: Option<(ServerId, u64)>, /// client configuration config: ClientConfig, /// Client tls config @@ -159,14 +155,6 @@ impl ClientBuilder { } } - /// Set the initial cluster version - #[inline] - #[must_use] - pub fn cluster_version(mut self, cluster_version: u64) -> Self { - self.cluster_version = Some(cluster_version); - self - } - /// Set the initial nodes #[inline] #[must_use] @@ -175,14 +163,6 @@ impl ClientBuilder { self } - /// Set the initial leader state - #[inline] - #[must_use] - pub fn leader_state(mut self, leader_id: ServerId, term: u64) -> Self { - self.leader_state = Some((leader_id, term)); - self - } - /// Set the tls config #[inline] #[must_use] diff --git a/crates/curp/tests/it/server.rs b/crates/curp/tests/it/server.rs index 6019e5260..a2af8b122 100644 --- a/crates/curp/tests/it/server.rs +++ b/crates/curp/tests/it/server.rs @@ -3,10 +3,7 @@ use std::time::Duration; use clippy_utilities::NumericCast; -use curp::{ - client::{ClientApi, ClientBuilder}, - rpc::CurpError, -}; +use curp::{client::ClientApi, rpc::CurpError}; use curp_test_utils::{ init_logger, test_cmd::{TestCommand, TestCommandResult}, @@ -15,7 +12,6 @@ use futures::stream::FuturesUnordered; use madsim::rand::{thread_rng, Rng}; use test_macros::abort_on_panic; use tokio_stream::StreamExt; -use utils::config::ClientConfig; use crate::common::curp_group::{CurpGroup, DEFAULT_SHUTDOWN_TIMEOUT}; @@ -425,6 +421,7 @@ async fn propose_update_node_should_success() { assert!(member.is_some_and(|m| m.peer_urls == ["new_addr"])); } +#[cfg(ignore)] // TODO: Rewrite this tests #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] async fn shutdown_rpc_should_shutdown_the_cluster_when_client_has_wrong_leader() { From b23cb2dbddde6adf40ac7f0d454844bc2e5dc6b0 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 20 Sep 2024 10:24:56 +0800 Subject: [PATCH 191/322] refactor: allow setting full cluster metadata in `ClientBuilder` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/mod.rs | 97 +++++++++++++++++++++++++++-------- crates/curp/src/member.rs | 1 - 2 files changed, 77 insertions(+), 21 deletions(-) diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index d56fd5e0b..e22fe3e77 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -60,11 +60,12 @@ use self::{ unary::Unary, }; use crate::{ + member::Membership, members::ServerId, rpc::{ self, connect::{BypassedConnect, ConnectApi}, - FetchMembershipResponse, ProposeId, Protocol, + FetchMembershipResponse, NodeMetadata, ProposeId, Protocol, }, server::StreamingProtocol, tracker::Tracker, @@ -102,12 +103,28 @@ impl Drop for ProposeIdGuard<'_> { } } +/// Sets the initial cluster for the client builder +#[derive(Debug, Clone)] +enum SetCluster { + /// Some nodes, used for discovery + Nodes(Vec>), + /// Full cluster metadata + Full { + /// The leader id + leader_id: u64, + /// The term of current cluster + term: u64, + /// The cluster members + members: HashMap>, + }, +} + /// Client builder to build a client #[derive(Debug, Clone, Default)] #[allow(clippy::module_name_repetitions)] // better than just Builder pub struct ClientBuilder { /// initial cluster members - init_nodes: Option>>, + init_cluster: Option, /// is current client send request to raw curp server is_raw_curp: bool, /// client configuration @@ -155,11 +172,28 @@ impl ClientBuilder { } } + /// Set the initial cluster + #[inline] + #[must_use] + pub fn init_cluster( + mut self, + leader_id: u64, + term: u64, + members: impl IntoIterator)>, + ) -> Self { + self.init_cluster = Some(SetCluster::Full { + leader_id, + term, + members: members.into_iter().collect(), + }); + self + } + /// Set the initial nodes #[inline] #[must_use] pub fn init_nodes(mut self, nodes: impl IntoIterator>) -> Self { - self.init_nodes = Some(nodes.into_iter().collect()); + self.init_cluster = Some(SetCluster::Nodes(nodes.into_iter().collect())); self } @@ -226,20 +260,43 @@ impl ClientBuilder { /// Connect to members #[allow(clippy::as_conversions)] // convert usize to u64 is legal - fn connect_members(&self, tls_config: Option<&ClientTlsConfig>) -> ClusterStateInit { - let all_members = self - .init_nodes + fn connect_members(&self, tls_config: Option<&ClientTlsConfig>) -> ClusterState { + match self + .init_cluster .clone() - .unwrap_or_else(|| unreachable!("requires members")) - .into_iter() - .enumerate() - .map(|(dummy_id, addrs)| (dummy_id as u64, addrs)) - .collect(); - let connects = rpc::connects(all_members, tls_config) - .map(|(_id, conn)| conn) - .collect(); - - ClusterStateInit::new(connects) + .unwrap_or_else(|| unreachable!("requires cluster to be set")) + { + SetCluster::Nodes(nodes) => { + let nodes = nodes + .into_iter() + .enumerate() + .map(|(dummy_id, addrs)| (dummy_id as u64, addrs)) + .collect(); + let connects = rpc::connects(nodes, tls_config) + .map(|(_id, conn)| conn) + .collect(); + + ClusterState::Init(ClusterStateInit::new(connects)) + } + SetCluster::Full { + leader_id, + term, + members, + } => { + let connects = rpc::connects(members.clone(), tls_config).collect(); + let member_ids = members.keys().copied().collect(); + let metas = members + .clone() + .into_iter() + .map(|(id, addrs)| (id, NodeMetadata::new("", addrs.clone(), addrs))) + .collect(); + let membership = Membership::new(vec![member_ids], metas); + let cluster_state = + cluster_state::ClusterStateReady::new(leader_id, term, connects, membership); + + ClusterState::Ready(cluster_state) + } + } } /// Build the client @@ -258,13 +315,13 @@ impl ClientBuilder { *self.config.wait_synced_timeout(), self.build_connect_to(None), ); - let cluster_state_init = self.connect_members(self.tls_config.as_ref()); + let cluster_state = self.connect_members(self.tls_config.as_ref()); let client = Retry::new( Unary::new(config), self.init_retry_config(), keep_alive, fetch, - ClusterState::Init(cluster_state_init), + cluster_state, ); Ok(client) @@ -328,13 +385,13 @@ impl ClientBuilderWithBypass

{ *self.inner.config.wait_synced_timeout(), self.inner.build_connect_to(Some(bypassed)), ); - let cluster_state_init = self.inner.connect_members(self.inner.tls_config.as_ref()); + let cluster_state = self.inner.connect_members(self.inner.tls_config.as_ref()); let client = Retry::new( Unary::new(config), self.inner.init_retry_config(), keep_alive, fetch, - ClusterState::Init(cluster_state_init), + cluster_state, ); Ok(client) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 70f904618..f3a69fe4c 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -183,7 +183,6 @@ pub(crate) struct Membership { } impl Membership { - #[cfg(test)] /// Creates a new `Membership` pub(crate) fn new(members: Vec>, nodes: BTreeMap) -> Self { Self { members, nodes } From 325960968e56e3b0fa98278ff9b9368a25388dd3 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 19 Sep 2024 10:54:07 +0800 Subject: [PATCH 192/322] test: rewrite membership change related tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/tests/it/server.rs | 331 ++++++++++++++++----------------- 1 file changed, 165 insertions(+), 166 deletions(-) diff --git a/crates/curp/tests/it/server.rs b/crates/curp/tests/it/server.rs index a2af8b122..f2529f71e 100644 --- a/crates/curp/tests/it/server.rs +++ b/crates/curp/tests/it/server.rs @@ -1,19 +1,30 @@ //! Integration test for the curp server -use std::time::Duration; +use std::{ + collections::{BTreeMap, BTreeSet}, + time::Duration, +}; use clippy_utilities::NumericCast; -use curp::{client::ClientApi, rpc::CurpError}; +use curp::{ + client::{ClientApi, ClientBuilder}, + member::MembershipInfo, + rpc::{CurpError, NodeMetadata}, +}; use curp_test_utils::{ - init_logger, - test_cmd::{TestCommand, TestCommandResult}, + init_logger, sleep_millis, + test_cmd::{TestCommand, TestCommandResult, TestCommandType}, }; use futures::stream::FuturesUnordered; use madsim::rand::{thread_rng, Rng}; use test_macros::abort_on_panic; +use tokio::net::TcpListener; use tokio_stream::StreamExt; +use utils::config::ClientConfig; -use crate::common::curp_group::{CurpGroup, DEFAULT_SHUTDOWN_TIMEOUT}; +use crate::common::curp_group::{ + commandpb::FetchMembershipRequest, CurpGroup, DEFAULT_SHUTDOWN_TIMEOUT, +}; #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] @@ -327,7 +338,53 @@ async fn shutdown_rpc_should_shutdown_the_cluster() { } } -#[cfg(ignore)] // TODO: Rewrite this tests +struct NodeAssert { + id: u64, + meta: NodeMetadata, + is_member: bool, +} + +impl NodeAssert { + fn new(id: u64, meta: NodeMetadata, is_member: bool) -> Self { + Self { + id, + meta, + is_member, + } + } +} + +async fn assert_cluster( + client: &impl ClientApi, + num_nodes: usize, + num_members: usize, + node_asserts: NS, +) where + NS: IntoIterator, +{ + let resp = loop { + // workaround for client id expires on new leader + if let Ok(resp) = client.fetch_cluster(true).await { + break resp; + } + }; + let member_ids: BTreeSet<_> = resp.members.into_iter().flat_map(|t| t.set).collect(); + assert_eq!(resp.nodes.len(), num_nodes); + assert_eq!(member_ids.len(), num_members); + for node_assert in node_asserts { + let node = resp + .nodes + .iter() + .find(|n| n.node_id == node_assert.id) + .expect("node not found in fetch cluster response"); + assert_eq!(node.meta, Some(node_assert.meta), "node meta not match"); + assert_eq!( + node_assert.is_member, + member_ids.iter().any(|i| *i == node_assert.id) + ); + } +} + #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] async fn propose_add_node_should_success() { @@ -336,92 +393,85 @@ async fn propose_add_node_should_success() { let group = CurpGroup::new(3).await; let client = group.new_client().await; - let node_id = - ClusterInfo::calculate_member_id(vec!["address".to_owned()], "", Some(timestamp())); - let changes = vec![ConfChange::add(node_id, vec!["address".to_string()])]; - let res = client.propose_conf_change(changes).await; - let members = res.unwrap(); - assert_eq!(members.len(), 4); - assert!(members.iter().any(|m| m.id == node_id)); + let node_meta = NodeMetadata::new("new_node", ["addr"], ["addr"]); + let id = client.add_learner(vec![node_meta.clone()]).await.unwrap()[0]; + assert_cluster(&client, 4, 3, [NodeAssert::new(id, node_meta, false)]).await; } -#[cfg(ignore)] // TODO: Rewrite this tests #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] -async fn propose_remove_follower_should_success() { +async fn propose_remove_node_should_success() { init_logger(); - let group = CurpGroup::new(5).await; + let group = CurpGroup::new(3).await; let client = group.new_client().await; - let leader_id = group.get_leader().await.0; - let follower_id = *group.nodes.keys().find(|&id| &leader_id != id).unwrap(); - let changes = vec![ConfChange::remove(follower_id)]; - let members = client.propose_conf_change(changes).await.unwrap(); - assert_eq!(members.len(), 4); - assert!(members.iter().all(|m| m.id != follower_id)); - sleep_secs(7).await; // wait the removed node start election and detect it is removed - assert!(group - .nodes - .get(&follower_id) - .unwrap() - .task_manager - .is_finished()); - // check if the old client can propose to the new cluster - client - .propose(&TestCommand::new_get(vec![1]), None, true) - .await - .unwrap() - .unwrap(); + let node_meta = NodeMetadata::new("new_node", ["addr"], ["addr"]); + let id = client.add_learner(vec![node_meta.clone()]).await.unwrap()[0]; + assert_cluster(&client, 4, 3, [NodeAssert::new(id, node_meta, false)]).await; + + client.remove_learner(vec![id]).await.unwrap(); + assert_cluster(&client, 3, 3, []).await; } -#[cfg(ignore)] // TODO: Rewrite this tests #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] -async fn propose_remove_leader_should_success() { +async fn propose_add_member_should_success() { init_logger(); - let group = CurpGroup::new(5).await; + let group = CurpGroup::new(3).await; let client = group.new_client().await; - let leader_id = group.get_leader().await.0; - let changes = vec![ConfChange::remove(leader_id)]; - let members = client.propose_conf_change(changes).await.unwrap(); - assert_eq!(members.len(), 4); - assert!(members.iter().all(|m| m.id != leader_id)); - sleep_secs(7).await; // wait for the new leader to be elected - assert!(group - .nodes - .get(&leader_id) - .unwrap() - .task_manager - .is_finished()); - let new_leader_id = group.get_leader().await.0; - assert_ne!(new_leader_id, leader_id); - // check if the old client can propose to the new cluster - client - .propose(&TestCommand::new_get(vec![1]), None, true) - .await - .unwrap() - .unwrap(); + + let node_meta = NodeMetadata::new("new_node", ["addr"], ["addr"]); + let id = client.add_learner(vec![node_meta.clone()]).await.unwrap()[0]; + assert_cluster(&client, 4, 3, [NodeAssert::new(id, node_meta, false)]).await; + + client.add_member(vec![id]).await.unwrap(); + assert_cluster(&client, 4, 4, []).await; +} + +#[tokio::test(flavor = "multi_thread")] +#[abort_on_panic] +async fn propose_remove_member_should_success() { + init_logger(); + + let group = CurpGroup::new(3).await; + let client = group.new_client().await; + + let node_meta = NodeMetadata::new("new_node", ["addr"], ["addr"]); + let id = client.add_learner(vec![node_meta.clone()]).await.unwrap()[0]; + assert_cluster(&client, 4, 3, [NodeAssert::new(id, node_meta, false)]).await; + + client.add_member(vec![id]).await.unwrap(); + assert_cluster(&client, 4, 4, []).await; + + client.remove_member(vec![id]).await.unwrap(); + assert_cluster(&client, 4, 3, []).await; + + client.remove_learner(vec![id]).await.unwrap(); + assert_cluster(&client, 3, 3, []).await; } -#[cfg(ignore)] // TODO: Rewrite this tests #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] -async fn propose_update_node_should_success() { +async fn propose_remove_leader_should_success() { init_logger(); - let group = CurpGroup::new(5).await; + let group = CurpGroup::new(3).await; let client = group.new_client().await; - let node_id = group.nodes.keys().next().copied().unwrap(); - let changes = vec![ConfChange::update(node_id, vec!["new_addr".to_owned()])]; - let members = client.propose_conf_change(changes).await.unwrap(); - assert_eq!(members.len(), 5); - let member = members.iter().find(|m| m.id == node_id); - assert!(member.is_some_and(|m| m.peer_urls == ["new_addr"])); + + let id = client.fetch_leader_id(true).await.unwrap(); + + client.remove_member(vec![id]).await.unwrap(); + assert_cluster(&client, 3, 2, []).await; + + client.remove_learner(vec![id]).await.unwrap(); + assert_cluster(&client, 2, 2, []).await; + + let new_id = client.fetch_leader_id(true).await.unwrap(); + assert_ne!(id, new_id); } -#[cfg(ignore)] // TODO: Rewrite this tests #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] async fn shutdown_rpc_should_shutdown_the_cluster_when_client_has_wrong_leader() { @@ -433,10 +483,10 @@ async fn shutdown_rpc_should_shutdown_the_cluster_when_client_has_wrong_leader() let follower_id = *group.nodes.keys().find(|&id| &leader_id != id).unwrap(); // build a client and set a wrong leader id let client = ClientBuilder::new(ClientConfig::default(), true) - .leader_state(follower_id, 0) - .init_nodes(group.all_addrs_map().values().cloned()) + .init_cluster(follower_id, 0, group.all_addrs_map()) .build::() .unwrap(); + client.propose_shutdown().await.unwrap(); group @@ -444,32 +494,28 @@ async fn shutdown_rpc_should_shutdown_the_cluster_when_client_has_wrong_leader() .await; } -#[cfg(ignore)] // TODO: Rewrite this tests #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] async fn propose_conf_change_to_follower() { init_logger(); - let group = CurpGroup::new(5).await; + let group = CurpGroup::new(3).await; let leader_id = group.get_leader().await.0; let follower_id = *group.nodes.keys().find(|&id| &leader_id != id).unwrap(); // build a client and set a wrong leader id let client = ClientBuilder::new(ClientConfig::default(), true) - .leader_state(follower_id, 0) - .all_members(group.all_addrs_map()) + .init_cluster(follower_id, 0, group.all_addrs_map()) .build::() .unwrap(); - let node_id = group.nodes.keys().next().copied().unwrap(); - let changes = vec![ConfChange::update(node_id, vec!["new_addr".to_owned()])]; - let members = client.propose_conf_change(changes).await.unwrap(); - assert_eq!(members.len(), 5); - let member = members.iter().find(|m| m.id == node_id); - assert!(member.is_some_and(|m| m.peer_urls == ["new_addr"])); + let node_meta = NodeMetadata::new("new_node", ["addr"], ["addr"]); + let id = client.add_learner(vec![node_meta.clone()]).await.unwrap()[0]; + assert_cluster(&client, 4, 3, [NodeAssert::new(id, node_meta, false)]).await; } -#[cfg(ignore)] // TODO: Rewrite this tests -async fn check_new_node(is_learner: bool) { +#[tokio::test(flavor = "multi_thread")] +#[abort_on_panic] +async fn new_node_should_apply_old_cluster_logs() { init_logger(); let mut group = CurpGroup::new(3).await; @@ -480,44 +526,41 @@ async fn check_new_node(is_learner: bool) { let listener = TcpListener::bind("0.0.0.0:0").await.unwrap(); let addr = listener.local_addr().unwrap().to_string(); let addrs = vec![addr.clone()]; - let node_id = ClusterInfo::calculate_member_id(addrs.clone(), "", Some(123)); - let changes = if is_learner { - vec![ConfChange::add_learner(node_id, addrs.clone())] - } else { - vec![ConfChange::add(node_id, addrs.clone())] - }; - - let members = client.propose_conf_change(changes).await.unwrap(); - assert_eq!(members.len(), 4); - assert!(members.iter().any(|m| m.id == node_id)); + let node_meta = NodeMetadata::new("new_node", addrs.clone(), addrs); + let node_id = client.add_learner(vec![node_meta]).await.unwrap()[0]; /******* start new node *******/ - // 1. fetch cluster from other nodes - let cluster_info = Arc::new(group.fetch_cluster_info(&[addr], "new_node").await); - - // 2. start new node + // 1. start new node group - .run_node(listener, "new_node".to_owned(), cluster_info) + .run_node( + listener, + "new_node".to_owned(), + MembershipInfo::new(node_id, BTreeMap::default()), + ) .await; - sleep_millis(500).await; // wait new node publish it's name to cluster - // 3. fetch and check cluster from new node + sleep_millis(500).await; // wait for membership sync + + // 2. fetch and check cluster from new node let mut new_connect = group.get_connect(&node_id).await; let res = new_connect - .fetch_cluster(tonic::Request::new(FetchClusterRequest { - linearizable: false, - })) + .fetch_membership(FetchMembershipRequest {}) .await .unwrap() .into_inner(); - assert_eq!(res.members.len(), 4); + assert_eq!(res.nodes.len(), 4); assert!(res + .nodes + .iter() + .any(|m| m.node_id == node_id && m.meta.as_ref().unwrap().name == "new_node")); + assert!(!res .members .iter() - .any(|m| m.id == node_id && m.name == "new_node" && is_learner == m.is_learner)); + .flat_map(|s| &s.set) + .any(|m| *m == node_id)); - // 4. check if the new node syncs the command from old cluster + // 3. check if the new node syncs the command from old cluster let new_node = group.nodes.get_mut(&node_id).unwrap(); let (cmd, _) = new_node.as_rx.recv().await.unwrap(); assert_eq!( @@ -529,7 +572,7 @@ async fn check_new_node(is_learner: bool) { } ); - // 5. check if the old client can propose to the new cluster + // 4. check if the old client can propose to the new cluster client .propose(&TestCommand::new_get(vec![1]), None, true) .await @@ -537,21 +580,6 @@ async fn check_new_node(is_learner: bool) { .unwrap(); } -#[cfg(ignore)] // TODO: Rewrite this tests -#[tokio::test(flavor = "multi_thread")] -#[abort_on_panic] -async fn new_follower_node_should_apply_old_cluster_logs() { - check_new_node(false).await; -} - -#[cfg(ignore)] // TODO: Rewrite this tests -#[tokio::test(flavor = "multi_thread")] -#[abort_on_panic] -async fn new_learner_node_should_apply_old_cluster_logs() { - check_new_node(true).await; -} - -#[cfg(ignore)] // TODO: Rewrite this tests #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] async fn shutdown_rpc_should_shutdown_the_cluster_when_client_has_wrong_cluster() { @@ -562,16 +590,17 @@ async fn shutdown_rpc_should_shutdown_the_cluster_when_client_has_wrong_cluster( let listener = TcpListener::bind("0.0.0.0:0").await.unwrap(); let addrs = vec![listener.local_addr().unwrap().to_string()]; - let node_id = ClusterInfo::calculate_member_id(addrs.clone(), "", Some(123)); - let changes = vec![ConfChange::add(node_id, addrs.clone())]; - let members = client.propose_conf_change(changes).await.unwrap(); - assert_eq!(members.len(), 4); - assert!(members.iter().any(|m| m.id == node_id)); - let cluster_info = Arc::new(group.fetch_cluster_info(&addrs, "new_node").await); + let node_meta = NodeMetadata::new("new_node", addrs.clone(), addrs); + let node_id = client.add_learner(vec![node_meta]).await.unwrap()[0]; group - .run_node(listener, "new_node".to_owned(), cluster_info) + .run_node( + listener, + "new_node".to_owned(), + MembershipInfo::new(node_id, BTreeMap::default()), + ) .await; + client.propose_shutdown().await.unwrap(); group @@ -579,7 +608,6 @@ async fn shutdown_rpc_should_shutdown_the_cluster_when_client_has_wrong_cluster( .await; } -#[cfg(ignore)] // TODO: Rewrite this tests #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] async fn propose_conf_change_rpc_should_work_when_client_has_wrong_cluster() { @@ -590,50 +618,21 @@ async fn propose_conf_change_rpc_should_work_when_client_has_wrong_cluster() { let listener = TcpListener::bind("0.0.0.0:0").await.unwrap(); let addrs = vec![listener.local_addr().unwrap().to_string()]; - let node_id = ClusterInfo::calculate_member_id(addrs.clone(), "", Some(123)); - let changes = vec![ConfChange::add(node_id, addrs.clone())]; - let members = client.propose_conf_change(changes).await.unwrap(); - assert_eq!(members.len(), 4); - assert!(members.iter().any(|m| m.id == node_id)); - let cluster_info = Arc::new(group.fetch_cluster_info(&addrs, "new_node").await); + let node_meta = NodeMetadata::new("new_node", addrs.clone(), addrs); + let node_id = client.add_learner(vec![node_meta]).await.unwrap()[0]; group - .run_node(listener, "new_node".to_owned(), cluster_info) + .run_node( + listener, + "new_node".to_owned(), + MembershipInfo::new(node_id, BTreeMap::default()), + ) .await; - let changes = vec![ConfChange::remove(node_id)]; - let members = client.propose_conf_change(changes).await.unwrap(); - assert_eq!(members.len(), 3); - assert!(members.iter().all(|m| m.id != node_id)); + client.remove_member(vec![node_id]).await.unwrap(); group .wait_for_node_shutdown(node_id, DEFAULT_SHUTDOWN_TIMEOUT) .await; } -#[cfg(ignore)] // TODO: Rewrite this tests -#[tokio::test(flavor = "multi_thread")] -#[abort_on_panic] -async fn fetch_read_state_rpc_should_work_when_client_has_wrong_cluster() { - init_logger(); - let tmp_path = tempfile::TempDir::new().unwrap().into_path(); - let mut group = CurpGroup::new_rocks(3, tmp_path.clone()).await; - let client = group.new_client().await; - - let listener = TcpListener::bind("0.0.0.0:0").await.unwrap(); - let addrs = vec![listener.local_addr().unwrap().to_string()]; - let node_id = ClusterInfo::calculate_member_id(addrs.clone(), "", Some(123)); - let changes = vec![ConfChange::add(node_id, addrs.clone())]; - let members = client.propose_conf_change(changes).await.unwrap(); - assert_eq!(members.len(), 4); - assert!(members.iter().any(|m| m.id == node_id)); - let cluster_info = Arc::new(group.fetch_cluster_info(&addrs, "new_node").await); - group - .run_node(listener, "new_node".to_owned(), cluster_info) - .await; - - let cmd = TestCommand::new_get(vec![0]); - let res = client.fetch_read_state(&cmd).await; - assert!(res.is_ok()); -} - #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] async fn move_leader_should_move_leadership_to_target_node() { From 729d3b5426b38456bf225e6e6e98b09521770cf7 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 20 Sep 2024 15:04:12 +0800 Subject: [PATCH 193/322] fix: leader will reset transferee after the transferee has been removed Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/member_impl.rs | 1 + crates/curp/src/server/raw_curp/member_impl.rs | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 1cdae258a..73724dcf5 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -100,6 +100,7 @@ impl, RC: RoleChange> CurpNode { self.curp.wait_propose_ids(Some(propose_id)).await; } self.curp.update_role_leader(); + self.curp.update_transferee(); Ok(()) } diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 715e84212..06ba90799 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -7,6 +7,7 @@ use curp_external_api::role_change::RoleChange; use curp_external_api::LogIndex; use event_listener::Event; use rand::Rng; +use utils::parking_lot_lock::RwLockMap; use crate::log_entry::EntryData; use crate::log_entry::LogEntry; @@ -67,6 +68,16 @@ impl RawCurp { self.update_role(&ms_r); } + /// Updates the role if the node is leader + pub(crate) fn update_transferee(&self) { + let Some(transferee) = self.lst.get_transferee() else { + return; + }; + if !self.ms.map_read(|ms| ms.is_member(transferee)) { + self.lst.reset_transferee(); + } + } + /// Append membership entries pub(crate) fn append_membership( &self, From 1d2f7314c43b07c970a96aeb3a411c0796a95765 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 20 Sep 2024 16:52:02 +0800 Subject: [PATCH 194/322] fix: always commit membership index in `append_membership` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/raw_curp/member_impl.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 06ba90799..bbae200ff 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -104,11 +104,11 @@ impl RawCurp { for (index, config) in configs { self.on_membership_update(&config, &spawn_sync); ms_w.cluster_mut().append(index, config); - ms_w.cluster_mut().commit(commit_index.min(index)); self.ctx .curp_storage .put_membership(ms_w.node_id(), ms_w.cluster())?; } + ms_w.cluster_mut().commit(commit_index); self.update_role(&ms_w); From cf4bc7011dcf86f2652a975a4032eb5be2afe046 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 20 Sep 2024 17:18:21 +0800 Subject: [PATCH 195/322] fix: behavior of `Membership::contains` The original `Membership::contains` does not checks the node's membership. Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 15 +++++---------- crates/curp/src/server/raw_curp/mod.rs | 4 ++-- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index f3a69fe4c..d0a75cb7d 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -90,17 +90,12 @@ impl NodeMembershipState { /// Returns `true` if the current node is a member of the cluster pub(crate) fn is_self_member(&self) -> bool { - self.cluster().effective().contains(self.node_id()) + self.cluster().effective().contains_member(self.node_id()) } /// Returns `true` if the given node is a member of the cluster pub(crate) fn is_member(&self, id: u64) -> bool { - self.cluster().effective().contains(id) - } - - /// Returns `true` if the given node is a member of the cluster - pub(crate) fn check_membership(&self, node_id: u64) -> bool { - self.cluster().effective().contains(node_id) + self.cluster().effective().contains_member(id) } /// Returns all member ids @@ -295,9 +290,9 @@ impl Membership { }) } - /// Returns `true` if the membership contains the given node id - pub(crate) fn contains(&self, node_id: u64) -> bool { - self.nodes.contains_key(&node_id) + /// Returns `true` if the given node id is present in `members`. + pub(crate) fn contains_member(&self, node_id: u64) -> bool { + self.members.iter().any(|s| s.contains(&node_id)) } } diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 5912917bd..30d2c57d4 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -871,7 +871,7 @@ impl RawCurp { let st_r = self.st.read(); let log_r = self.log.read(); - let contains_candidate = self.ms.map_read(|ms| ms.check_membership(candidate_id)); + let contains_candidate = self.ms.map_read(|ms| ms.is_member(candidate_id)); // extra check to shutdown removed node if !contains_candidate { debug!( @@ -1095,7 +1095,7 @@ impl RawCurp { if st_r.role != Role::Leader { return Err(CurpError::redirect(st_r.leader_id, st_r.term)); } - if !self.ms.map_read(|ms| ms.check_membership(target_id)) { + if !self.ms.map_read(|ms| ms.is_member(target_id)) { return Err(CurpError::LeaderTransfer( "target node does not exist or it is a learner".to_owned(), )); From e26f850b6ddf1b4990c651083a460c0fe0efedad Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 20 Sep 2024 16:52:59 +0800 Subject: [PATCH 196/322] test: rewrite membership change tests in `raw_curp` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/raw_curp/mod.rs | 6 + crates/curp/src/server/raw_curp/tests.rs | 285 ++++++++++------------- 2 files changed, 128 insertions(+), 163 deletions(-) diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 30d2c57d4..7676b0842 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -1193,6 +1193,12 @@ impl RawCurp { self.ms.read().cluster().effective().clone() } + /// Get the committed membership + #[cfg(test)] + pub(super) fn committed_membership(&self) -> Membership { + self.ms.read().cluster().committed().clone() + } + /// Get `append_entries` request for `follower_id` that contains the latest /// log entries pub(super) fn sync(&self, follower_id: ServerId) -> Option> { diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index f2bfbcfee..afbadd1f8 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -9,7 +9,8 @@ use utils::config::{ use super::*; use crate::{ - rpc::Redirect, + member::Change, + rpc::{self, NodeMetadata, Redirect}, server::{ cmd_board::CommandBoard, conflict::test_pools::{TestSpecPool, TestUncomPool}, @@ -61,6 +62,17 @@ impl RawCurp { std::mem::forget(as_rx); let resp_txs = Arc::new(Mutex::default()); let id_barrier = Arc::new(IdBarrier::new()); + let init_members = (0..n) + .map(|id| (id, NodeMetadata::new(format!("S{id}"), ["addr"], ["addr"]))) + .collect(); + let membership_info = MembershipInfo::new(0, init_members); + let peer_addrs: HashMap<_, _> = membership_info + .init_members + .clone() + .into_iter() + .map(|(id, meta)| (id, meta.into_peer_urls())) + .collect(); + let member_connects = rpc::inner_connects(peer_addrs, None).collect(); Self::builder() .is_leader(true) @@ -75,6 +87,8 @@ impl RawCurp { .as_tx(as_tx) .resp_txs(resp_txs) .id_barrier(id_barrier) + .membership_info(membership_info) + .member_connects(member_connects) .build_raw_curp() .unwrap() } @@ -658,204 +672,142 @@ fn is_synced_should_return_true_when_followers_caught_up_with_leader() { assert!(curp.is_synced(s2_id)); } -#[cfg(ignore)] // TODO: rewrite config change tests #[traced_test] -#[test] -fn add_node_should_add_new_node_to_curp() { +#[tokio::test] // TODO: use sync context +async fn add_node_should_add_new_node_to_curp() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; - let old_cluster = curp.cluster().clone(); - let changes = vec![ConfChange::add(1, vec!["http://127.0.0.1:4567".to_owned()])]; - assert!(curp.check_new_config(&changes).is_ok()); - let infos = curp.apply_conf_change(changes.clone()).unwrap(); - assert!(curp.contains(1)); - curp.fallback_conf_change(changes, infos.0, infos.1, infos.2); - let cluster_after_fallback = curp.cluster(); - assert_eq!( - old_cluster.cluster_id(), - cluster_after_fallback.cluster_id() - ); - assert_eq!(old_cluster.self_id(), cluster_after_fallback.self_id()); - assert_eq!( - old_cluster.all_members(), - cluster_after_fallback.all_members() - ); - assert_eq!( - cluster_after_fallback.cluster_version(), - old_cluster.cluster_version() - ); + let original_membership = Membership::new(vec![(0..3).collect()], BTreeMap::default()); + let membership = Membership::new(vec![(0..4).collect()], BTreeMap::default()); + let _ignore = curp.update_membership(membership, |_, _, _| {}).unwrap(); + assert!(curp + .effective_membership() + .members + .iter() + .flatten() + .any(|id| *id == 3)); + let _ignore = curp + .update_membership(original_membership, |_, _, _| {}) + .unwrap(); + assert!(!curp + .effective_membership() + .members + .iter() + .flatten() + .any(|id| *id == 3)); } -#[cfg(ignore)] // TODO: rewrite config change tests #[traced_test] -#[test] -fn add_learner_node_and_promote_should_success() { +#[tokio::test] // TODO: use sync context +async fn add_learner_node_and_promote_should_success() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; - let changes = vec![ConfChange::add_learner( - 1, - vec!["http://127.0.0.1:4567".to_owned()], - )]; - assert!(curp.check_new_config(&changes).is_ok()); - curp.apply_conf_change(changes); - assert!(curp.check_learner(1, true)); - - let changes = vec![ConfChange::promote(1)]; - assert!(curp.check_new_config(&changes).is_ok()); - let infos = curp.apply_conf_change(changes.clone()).unwrap(); - assert!(curp.check_learner(1, false)); - curp.fallback_conf_change(changes, infos.0, infos.1, infos.2); - assert!(curp.check_learner(1, true)); + let membership = curp + .generate_membership(Change::AddLearner(vec![(3, NodeMetadata::default())])) + .pop() + .unwrap(); + let _ignore = curp.update_membership(membership, |_, _, _| {}).unwrap(); + assert!(!curp + .effective_membership() + .members + .iter() + .flatten() + .any(|id| *id == 3)); + curp.membership_commit_to(1); + let membership = curp + .generate_membership(Change::AddMember(vec![3])) + .pop() + .unwrap(); + let _ignore = curp.update_membership(membership, |_, _, _| {}).unwrap(); + assert!(curp + .effective_membership() + .members + .iter() + .flatten() + .any(|id| *id == 3)); } -#[cfg(ignore)] // TODO: rewrite config change tests #[traced_test] #[test] -fn add_exists_node_should_return_node_already_exists_error() { +fn add_exists_node_should_have_no_effect() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; let exists_node_id = curp.get_id_by_name("S1").unwrap(); - let changes = vec![ConfChange::add( - exists_node_id, - vec!["http://127.0.0.1:4567".to_owned()], - )]; - let resp = curp.check_new_config(&changes); - let error_match = matches!(resp, Err(CurpError::NodeAlreadyExists(()))); - assert!(error_match); + assert!(curp + .generate_membership(Change::AddLearner(vec![( + exists_node_id, + NodeMetadata::default(), + )])) + .is_empty()); + assert!(curp + .generate_membership(Change::AddMember(vec![exists_node_id])) + .is_empty()); } -#[cfg(ignore)] // TODO: rewrite config change tests #[traced_test] -#[test] -fn remove_node_should_remove_node_from_curp() { +#[tokio::test] // TODO: use sync context +async fn remove_node_should_remove_node_from_curp() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(5, mock_role_change(), task_manager)) }; - let old_cluster = curp.cluster().clone(); let follower_id = curp.get_id_by_name("S1").unwrap(); - let changes = vec![ConfChange::remove(follower_id)]; - assert!(curp.check_new_config(&changes).is_ok()); - let infos = curp.apply_conf_change(changes.clone()).unwrap(); - assert_eq!(infos, (vec!["S1".to_owned()], "S1".to_owned(), false)); - assert!(!curp.contains(follower_id)); - curp.fallback_conf_change(changes, infos.0, infos.1, infos.2); - let cluster_after_fallback = curp.cluster(); - assert_eq!( - old_cluster.cluster_id(), - cluster_after_fallback.cluster_id() - ); - assert_eq!(old_cluster.self_id(), cluster_after_fallback.self_id()); - assert_eq!( - old_cluster.all_members(), - cluster_after_fallback.all_members() - ); + let membership = curp + .generate_membership(Change::RemoveMember(vec![follower_id])) + .pop() + .unwrap(); + let _ignore = curp.update_membership(membership, |_, _, _| {}).unwrap(); + assert!(!curp + .effective_membership() + .members + .iter() + .flatten() + .any(|id| *id == follower_id)); + assert!(curp.effective_membership().nodes.contains_key(&follower_id)); } -#[cfg(ignore)] // TODO: rewrite config change tests #[traced_test] #[test] -fn remove_non_exists_node_should_return_node_not_exists_error() { +fn remove_non_exists_node_should_have_no_effect() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(5, mock_role_change(), task_manager)) }; - let changes = vec![ConfChange::remove(1)]; - let resp = curp.check_new_config(&changes); - assert!(matches!(resp, Err(CurpError::NodeNotExists(())))); -} - -#[cfg(ignore)] // TODO: rewrite config change tests -#[traced_test] -#[test] -fn update_node_should_update_the_address_of_node() { - let task_manager = Arc::new(TaskManager::new()); - let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; - let old_cluster = curp.cluster().clone(); - let follower_id = curp.get_id_by_name("S1").unwrap(); - let mut mock_connect = MockInnerConnectApi::new(); - mock_connect.expect_update_addrs().returning(|_| Ok(())); - curp.set_connect( - follower_id, - InnerConnectApiWrapper::new_from_arc(Arc::new(mock_connect)), - ); - assert_eq!( - curp.cluster().peer_urls(follower_id), - Some(vec!["S1".to_owned()]) - ); - let changes = vec![ConfChange::update( - follower_id, - vec!["http://127.0.0.1:4567".to_owned()], - )]; - assert!(curp.check_new_config(&changes).is_ok()); - let infos = curp.apply_conf_change(changes.clone()).unwrap(); - assert_eq!(infos, (vec!["S1".to_owned()], String::new(), false)); - assert_eq!( - curp.cluster().peer_urls(follower_id), - Some(vec!["http://127.0.0.1:4567".to_owned()]) - ); - curp.fallback_conf_change(changes, infos.0, infos.1, infos.2); - let cluster_after_fallback = curp.cluster(); - assert_eq!( - old_cluster.cluster_id(), - cluster_after_fallback.cluster_id() - ); - assert_eq!(old_cluster.self_id(), cluster_after_fallback.self_id()); - assert_eq!( - old_cluster.all_members(), - cluster_after_fallback.all_members() - ); + assert!(curp + .generate_membership(Change::RemoveLearner(vec![10])) + .is_empty()); + assert!(curp + .generate_membership(Change::RemoveMember(vec![10])) + .is_empty()); } -#[cfg(ignore)] // TODO: rewrite config change tests #[traced_test] -#[test] -fn leader_handle_propose_conf_change() { +#[tokio::test] // TODO: use sync context +async fn follower_append_membership_change() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; - let follower_id = curp.get_id_by_name("S1").unwrap(); - assert_eq!( - curp.cluster().peer_urls(follower_id), - Some(vec!["S1".to_owned()]) - ); - let changes = vec![ConfChange::update( - follower_id, - vec!["http://127.0.0.1:4567".to_owned()], - )]; - curp.handle_propose_conf_change(ProposeId(TEST_CLIENT_ID, 0), changes) + let membership = curp + .generate_membership(Change::AddLearner(vec![(3, NodeMetadata::default())])) + .pop() .unwrap(); -} -#[cfg(ignore)] // TODO: rewrite config change tests -#[traced_test] -#[test] -fn follower_handle_propose_conf_change() { - let task_manager = Arc::new(TaskManager::new()); - let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; curp.update_to_term_and_become_follower(&mut *curp.st.write(), 2); - - let follower_id = curp.get_id_by_name("S1").unwrap(); - assert_eq!( - curp.cluster().peer_urls(follower_id), - Some(vec!["S1".to_owned()]) - ); - let changes = vec![ConfChange::update( - follower_id, - vec!["http://127.0.0.1:4567".to_owned()], - )]; - let result = curp.handle_propose_conf_change(ProposeId(TEST_CLIENT_ID, 0), changes); - assert!(matches!( - result, - Err(CurpError::Redirect(Redirect { - leader_id: None, - term: 2, - })) - )); + let log = LogEntry::new(1, 1, ProposeId::default(), membership.clone()); + let _ignore = curp.append_membership([log], 1, 0, |_, _, _| {}).unwrap(); + assert_eq!(curp.effective_membership(), membership); + assert_ne!(curp.committed_membership(), membership); + let log1 = LogEntry::new(2, 1, ProposeId::default(), EntryData::::Empty); + let _ignore = curp.append_membership([log1], 1, 1, |_, _, _| {}).unwrap(); + assert_eq!(curp.effective_membership(), membership); + assert_eq!(curp.committed_membership(), membership); } -#[cfg(ignore)] // TODO: rewrite config change tests #[traced_test] -#[test] -fn leader_handle_move_leader() { +#[tokio::test] // TODO: use sync context +async fn leader_handle_move_leader() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; - curp.switch_config(ConfChange::add_learner(1234, vec!["address".to_owned()])); + let membership = curp + .generate_membership(Change::AddLearner(vec![(1234, NodeMetadata::default())])) + .pop() + .unwrap(); + let _ignore = curp.update_membership(membership, |_, _, _| {}).unwrap(); let res = curp.handle_move_leader(1234); assert!(res.is_err()); @@ -864,6 +816,9 @@ fn leader_handle_move_leader() { assert!(res.is_err()); let target_id = curp.get_id_by_name("S1").unwrap(); + let _ignore = curp + .handle_append_entries_resp(target_id, Some(1), 1, true, 1) + .unwrap(); let res = curp.handle_move_leader(target_id); // need to send try become leader now after handle_move_leader assert!(res.is_ok_and(|b| b)); @@ -885,10 +840,9 @@ fn follower_handle_move_leader() { assert!(matches!(res, Err(CurpError::Redirect(_)))); } -#[cfg(ignore)] // TODO: rewrite config change tests #[traced_test] -#[test] -fn leader_will_reset_transferee_after_remove_node() { +#[tokio::test] +async fn leader_will_reset_transferee_after_remove_node() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(5, mock_role_change(), task_manager)) }; @@ -897,7 +851,12 @@ fn leader_will_reset_transferee_after_remove_node() { assert!(res.is_ok_and(|b| b)); assert_eq!(curp.get_transferee(), Some(target_id)); - curp.switch_config(ConfChange::remove(target_id)); + let membership = Membership::new( + vec![(0..5).filter(|id| *id != target_id).collect()], + BTreeMap::default(), + ); + let _ignore = curp.update_membership(membership, |_, _, _| {}).unwrap(); + curp.update_transferee(); assert!(curp.get_transferee().is_none()); } From 148ab143a87707b7417ec760398fb67348bcde51 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 20 Sep 2024 18:09:47 +0800 Subject: [PATCH 197/322] refactor: implement `ClusterServer` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/xline/Cargo.toml | 1 + crates/xline/src/server/cluster_server.rs | 96 ++++++++++++++++++++--- 2 files changed, 87 insertions(+), 10 deletions(-) diff --git a/crates/xline/Cargo.toml b/crates/xline/Cargo.toml index 83d15d899..4405f4675 100644 --- a/crates/xline/Cargo.toml +++ b/crates/xline/Cargo.toml @@ -52,6 +52,7 @@ pbkdf2 = { version = "0.12.2", features = ["simple"] } priority-queue = "2.0.2" prometheus = "0.13.4" prost = "0.13.3" +rand = "0.8.5" real_tokio = { version = "1", package = "tokio" } serde = { version = "1.0.204", features = ["derive"] } sha2 = "0.10.6" diff --git a/crates/xline/src/server/cluster_server.rs b/crates/xline/src/server/cluster_server.rs index 7097b3485..e97bd2b3a 100644 --- a/crates/xline/src/server/cluster_server.rs +++ b/crates/xline/src/server/cluster_server.rs @@ -1,11 +1,10 @@ -// FIXME: implement cluster server -#![allow(unused, clippy::unimplemented)] - -use std::sync::Arc; +use std::{collections::BTreeSet, sync::Arc}; +use curp::rpc::{CurpError, NodeMetadata}; +use rand::Rng; use tonic::{Request, Response, Status}; use xlineapi::{ - command::CurpClient, Cluster, MemberAddRequest, MemberAddResponse, MemberListRequest, + command::CurpClient, Cluster, Member, MemberAddRequest, MemberAddResponse, MemberListRequest, MemberListResponse, MemberPromoteRequest, MemberPromoteResponse, MemberRemoveRequest, MemberRemoveResponse, MemberUpdateRequest, MemberUpdateResponse, }; @@ -25,6 +24,33 @@ impl ClusterServer { pub(crate) fn new(client: Arc, header_gen: Arc) -> Self { Self { client, header_gen } } + + /// Fetch members + async fn fetch_members(&self, linearizable: bool) -> Result, Status> { + let resp = self.client.fetch_cluster(linearizable).await?; + let member_ids: BTreeSet<_> = resp.members.into_iter().flat_map(|q| q.set).collect(); + Ok(resp + .nodes + .into_iter() + .map(|n| { + let (id, meta) = n.into_parts(); + Member { + id, + name: meta.name, + peer_ur_ls: meta.peer_urls, + client_ur_ls: meta.client_urls, + is_learner: !member_ids.contains(&id), + } + }) + .collect()) + } + + /// Generate a random node name + fn gen_rand_node_name() -> String { + let mut rng = rand::thread_rng(); + let suffix_num: u32 = rng.gen(); + format!("xline_{suffix_num:08x}") + } } #[tonic::async_trait] @@ -33,19 +59,54 @@ impl Cluster for ClusterServer { &self, request: Request, ) -> Result, Status> { - unimplemented!() + let header = self.header_gen.gen_header(); + let request = request.into_inner(); + let name = Self::gen_rand_node_name(); + let node = NodeMetadata::new(name, request.peer_ur_ls, vec![]); + let ids = self.client.add_learner(vec![node]).await?; + let id = ids + .into_iter() + .next() + .ok_or(tonic::Status::internal("invalid member added"))?; + + if !request.is_learner { + self.client.add_member(vec![id]).await?; + } + let members = self.fetch_members(true).await?; + let added = members + .iter() + .find(|m| m.id == id) + .ok_or(tonic::Status::internal("added member not found"))? + .clone(); + + Ok(tonic::Response::new(MemberAddResponse { + header: Some(header), + member: Some(added), + members, + })) } async fn member_remove( &self, request: Request, ) -> Result, Status> { - unimplemented!() + let header = self.header_gen.gen_header(); + let id = request.into_inner().id; + // In etcd a member could be a learner, and could return CurpError::InvalidMemberChange + // TODO: handle other errors that may returned + let _ignore = self.client.remove_member(vec![id]).await; + self.client.remove_learner(vec![id]).await?; + let members = self.fetch_members(true).await?; + + Ok(tonic::Response::new(MemberRemoveResponse { + header: Some(header), + members, + })) } async fn member_update( &self, - request: Request, + _request: Request, ) -> Result, Status> { unimplemented!() } @@ -54,13 +115,28 @@ impl Cluster for ClusterServer { &self, request: Request, ) -> Result, Status> { - unimplemented!() + let header = self.header_gen.gen_header(); + let members = self + .fetch_members(request.into_inner().linearizable) + .await?; + Ok(tonic::Response::new(MemberListResponse { + header: Some(header), + members, + })) } async fn member_promote( &self, request: Request, ) -> Result, Status> { - unimplemented!() + let header = self.header_gen.gen_header(); + self.client + .add_member(vec![request.into_inner().id]) + .await?; + let members = self.fetch_members(true).await?; + Ok(tonic::Response::new(MemberPromoteResponse { + header: Some(header), + members, + })) } } From 0cc4b907f3d549475cea6f76a28d2cdffbaee9db Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 23 Sep 2024 10:25:59 +0800 Subject: [PATCH 198/322] refactor: move node id speficication to the client side Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/proto/common | 2 +- crates/curp/src/client/connect.rs | 10 ++-- crates/curp/src/client/retry.rs | 4 +- crates/curp/src/client/unary/mod.rs | 14 ++--- crates/curp/src/rpc/mod.rs | 10 ++++ .../curp/src/server/curp_node/member_impl.rs | 10 ++-- .../curp/src/server/raw_curp/member_impl.rs | 9 ---- crates/curp/tests/it/server.rs | 54 ++++++++++++------- crates/xline-client/examples/member.rs | 10 ++-- crates/xline-client/src/clients/member.rs | 15 ++++-- crates/xline-client/tests/it/member.rs | 11 ++-- crates/xline/src/server/cluster_server.rs | 18 ++++--- 12 files changed, 89 insertions(+), 78 deletions(-) diff --git a/crates/curp/proto/common b/crates/curp/proto/common index c38721ded..b211a970d 160000 --- a/crates/curp/proto/common +++ b/crates/curp/proto/common @@ -1 +1 @@ -Subproject commit c38721deddae771fb13b558b603182b40528f68d +Subproject commit b211a970d8a3ecdf90c2b25ca30a05673b5b484a diff --git a/crates/curp/src/client/connect.rs b/crates/curp/src/client/connect.rs index a54e16388..1a74eb1eb 100644 --- a/crates/curp/src/client/connect.rs +++ b/crates/curp/src/client/connect.rs @@ -3,7 +3,7 @@ use curp_external_api::cmd::Command; use crate::{ members::ServerId, - rpc::{FetchMembershipResponse, NodeMetadata, ReadState}, + rpc::{FetchMembershipResponse, Node, ReadState}, }; use super::retry::Context; @@ -60,7 +60,7 @@ pub trait ClientApi { } /// Add some learners to the cluster. - async fn add_learner(&self, nodes: Vec) -> Result, Self::Error>; + async fn add_learner(&self, nodes: Vec) -> Result<(), Self::Error>; /// Remove some learners from the cluster. async fn remove_learner(&self, ids: Vec) -> Result<(), Self::Error>; @@ -105,11 +105,7 @@ pub(crate) trait RepeatableClientApi { ) -> Result; /// Add some learners to the cluster. - async fn add_learner( - &self, - nodes: Vec, - ctx: Context, - ) -> Result, Self::Error>; + async fn add_learner(&self, nodes: Vec, ctx: Context) -> Result<(), Self::Error>; /// Remove some learners from the cluster. async fn remove_learner(&self, ids: Vec, ctx: Context) -> Result<(), Self::Error>; diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 5783a3805..aa609ee23 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -19,7 +19,7 @@ use super::{ }; use crate::{ members::ServerId, - rpc::{CurpError, ReadState, Redirect, ProposeId, FetchMembershipResponse, NodeMetadata}, tracker::Tracker, + rpc::{CurpError, ReadState, Redirect, ProposeId, FetchMembershipResponse, NodeMetadata, Node}, tracker::Tracker, }; /// Backoff config @@ -434,7 +434,7 @@ where } /// Add some learners to the cluster. - async fn add_learner(&self, nodes: Vec) -> Result, Self::Error> { + async fn add_learner(&self, nodes: Vec) -> Result<(), Self::Error> { self.retry::<_, _>(|client, ctx| client.add_learner(nodes.clone(), ctx)) .await } diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 224396a24..5a17be986 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -13,8 +13,8 @@ use super::{ retry::Context, }; use crate::rpc::{ - AddLearnerRequest, AddMemberRequest, CurpError, FetchReadStateRequest, MoveLeaderRequest, - NodeMetadata, ReadState, RemoveLearnerRequest, RemoveMemberRequest, ShutdownRequest, + AddLearnerRequest, AddMemberRequest, CurpError, FetchReadStateRequest, MoveLeaderRequest, Node, + ReadState, RemoveLearnerRequest, RemoveMemberRequest, ShutdownRequest, }; /// The unary client @@ -110,19 +110,15 @@ impl RepeatableClientApi for Unary { } /// Add some learners to the cluster. - async fn add_learner( - &self, - nodes: Vec, - ctx: Context, - ) -> Result, Self::Error> { + async fn add_learner(&self, nodes: Vec, ctx: Context) -> Result<(), Self::Error> { let req = AddLearnerRequest { nodes }; let timeout = self.config.wait_synced_timeout(); - let resp = ctx + let _ignore = ctx .cluster_state() .map_leader(|conn| async move { conn.add_learner(req, timeout).await }) .await?; - Ok(resp.into_inner().node_ids) + Ok(()) } /// Remove some learners from the cluster. diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index a581be62f..edc20033f 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -810,6 +810,16 @@ impl FetchMembershipResponse { } impl Node { + /// Creates a new `Node` + #[inline] + #[must_use] + pub fn new(node_id: u64, meta: NodeMetadata) -> Self { + Self { + node_id, + meta: Some(meta), + } + } + /// Unwraps self #[allow(clippy::unwrap_used, clippy::missing_panics_doc)] // convert rpc types #[inline] diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 73724dcf5..3cd731f89 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -17,6 +17,7 @@ use crate::rpc::AddLearnerResponse; use crate::rpc::AddMemberRequest; use crate::rpc::AddMemberResponse; use crate::rpc::CurpError; +use crate::rpc::Node; use crate::rpc::Redirect; use crate::rpc::RemoveLearnerRequest; use crate::rpc::RemoveLearnerResponse; @@ -32,13 +33,10 @@ impl, RC: RoleChange> CurpNode { request: AddLearnerRequest, ) -> Result { self.ensure_leader()?; - let node_ids = self.curp.new_node_ids(request.nodes.len()); - let ids_with_meta = node_ids.clone().into_iter().zip(request.nodes).collect(); + let nodes = request.nodes.into_iter().map(Node::into_parts).collect(); + self.update_and_wait(Change::AddLearner(nodes)).await?; - self.update_and_wait(Change::AddLearner(ids_with_meta)) - .await?; - - Ok(AddLearnerResponse { node_ids }) + Ok(AddLearnerResponse {}) } /// Removes a learner from the cluster diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index bbae200ff..c0040621a 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -6,7 +6,6 @@ use curp_external_api::cmd::Command; use curp_external_api::role_change::RoleChange; use curp_external_api::LogIndex; use event_listener::Event; -use rand::Rng; use utils::parking_lot_lock::RwLockMap; use crate::log_entry::EntryData; @@ -24,14 +23,6 @@ use super::RawCurp; use super::Role; impl RawCurp { - /// Generates new node ids - /// TODO: makes sure that the ids are unique - #[allow(clippy::unused_self)] // it should be used after the previous TODO - pub(crate) fn new_node_ids(&self, n: usize) -> Vec { - let mut rng = rand::thread_rng(); - (0..n).map(|_| rng.gen()).collect() - } - /// Generate memberships based on the provided change pub(crate) fn generate_membership(&self, change: Change) -> Vec { let ms_r = self.ms.read(); diff --git a/crates/curp/tests/it/server.rs b/crates/curp/tests/it/server.rs index f2529f71e..56fe2d4ef 100644 --- a/crates/curp/tests/it/server.rs +++ b/crates/curp/tests/it/server.rs @@ -9,7 +9,7 @@ use clippy_utilities::NumericCast; use curp::{ client::{ClientApi, ClientBuilder}, member::MembershipInfo, - rpc::{CurpError, NodeMetadata}, + rpc::{CurpError, Node, NodeMetadata}, }; use curp_test_utils::{ init_logger, sleep_millis, @@ -394,8 +394,10 @@ async fn propose_add_node_should_success() { let client = group.new_client().await; let node_meta = NodeMetadata::new("new_node", ["addr"], ["addr"]); - let id = client.add_learner(vec![node_meta.clone()]).await.unwrap()[0]; - assert_cluster(&client, 4, 3, [NodeAssert::new(id, node_meta, false)]).await; + let node_id = 5; + let node = Node::new(node_id, node_meta.clone()); + client.add_learner(vec![node]).await.unwrap(); + assert_cluster(&client, 4, 3, [NodeAssert::new(node_id, node_meta, false)]).await; } #[tokio::test(flavor = "multi_thread")] @@ -407,10 +409,12 @@ async fn propose_remove_node_should_success() { let client = group.new_client().await; let node_meta = NodeMetadata::new("new_node", ["addr"], ["addr"]); - let id = client.add_learner(vec![node_meta.clone()]).await.unwrap()[0]; - assert_cluster(&client, 4, 3, [NodeAssert::new(id, node_meta, false)]).await; + let node_id = 5; + let node = Node::new(node_id, node_meta.clone()); + client.add_learner(vec![node]).await.unwrap(); + assert_cluster(&client, 4, 3, [NodeAssert::new(node_id, node_meta, false)]).await; - client.remove_learner(vec![id]).await.unwrap(); + client.remove_learner(vec![node_id]).await.unwrap(); assert_cluster(&client, 3, 3, []).await; } @@ -423,10 +427,12 @@ async fn propose_add_member_should_success() { let client = group.new_client().await; let node_meta = NodeMetadata::new("new_node", ["addr"], ["addr"]); - let id = client.add_learner(vec![node_meta.clone()]).await.unwrap()[0]; - assert_cluster(&client, 4, 3, [NodeAssert::new(id, node_meta, false)]).await; + let node_id = 5; + let node = Node::new(node_id, node_meta.clone()); + client.add_learner(vec![node]).await.unwrap(); + assert_cluster(&client, 4, 3, [NodeAssert::new(node_id, node_meta, false)]).await; - client.add_member(vec![id]).await.unwrap(); + client.add_member(vec![node_id]).await.unwrap(); assert_cluster(&client, 4, 4, []).await; } @@ -439,16 +445,18 @@ async fn propose_remove_member_should_success() { let client = group.new_client().await; let node_meta = NodeMetadata::new("new_node", ["addr"], ["addr"]); - let id = client.add_learner(vec![node_meta.clone()]).await.unwrap()[0]; - assert_cluster(&client, 4, 3, [NodeAssert::new(id, node_meta, false)]).await; + let node_id = 5; + let node = Node::new(node_id, node_meta.clone()); + client.add_learner(vec![node]).await.unwrap(); + assert_cluster(&client, 4, 3, [NodeAssert::new(node_id, node_meta, false)]).await; - client.add_member(vec![id]).await.unwrap(); + client.add_member(vec![node_id]).await.unwrap(); assert_cluster(&client, 4, 4, []).await; - client.remove_member(vec![id]).await.unwrap(); + client.remove_member(vec![node_id]).await.unwrap(); assert_cluster(&client, 4, 3, []).await; - client.remove_learner(vec![id]).await.unwrap(); + client.remove_learner(vec![node_id]).await.unwrap(); assert_cluster(&client, 3, 3, []).await; } @@ -509,8 +517,10 @@ async fn propose_conf_change_to_follower() { .unwrap(); let node_meta = NodeMetadata::new("new_node", ["addr"], ["addr"]); - let id = client.add_learner(vec![node_meta.clone()]).await.unwrap()[0]; - assert_cluster(&client, 4, 3, [NodeAssert::new(id, node_meta, false)]).await; + let node_id = 5; + let node = Node::new(node_id, node_meta.clone()); + client.add_learner(vec![node]).await.unwrap(); + assert_cluster(&client, 4, 3, [NodeAssert::new(node_id, node_meta, false)]).await; } #[tokio::test(flavor = "multi_thread")] @@ -527,7 +537,9 @@ async fn new_node_should_apply_old_cluster_logs() { let addr = listener.local_addr().unwrap().to_string(); let addrs = vec![addr.clone()]; let node_meta = NodeMetadata::new("new_node", addrs.clone(), addrs); - let node_id = client.add_learner(vec![node_meta]).await.unwrap()[0]; + let node_id = 5; + let node = Node::new(node_id, node_meta.clone()); + client.add_learner(vec![node]).await.unwrap(); /******* start new node *******/ @@ -592,7 +604,9 @@ async fn shutdown_rpc_should_shutdown_the_cluster_when_client_has_wrong_cluster( let addrs = vec![listener.local_addr().unwrap().to_string()]; let node_meta = NodeMetadata::new("new_node", addrs.clone(), addrs); - let node_id = client.add_learner(vec![node_meta]).await.unwrap()[0]; + let node_id = 5; + let node = Node::new(node_id, node_meta); + client.add_learner(vec![node]).await.unwrap(); group .run_node( listener, @@ -619,7 +633,9 @@ async fn propose_conf_change_rpc_should_work_when_client_has_wrong_cluster() { let listener = TcpListener::bind("0.0.0.0:0").await.unwrap(); let addrs = vec![listener.local_addr().unwrap().to_string()]; let node_meta = NodeMetadata::new("new_node", addrs.clone(), addrs); - let node_id = client.add_learner(vec![node_meta]).await.unwrap()[0]; + let node_id = 5; + let node = Node::new(node_id, node_meta); + client.add_learner(vec![node]).await.unwrap(); group .run_node( listener, diff --git a/crates/xline-client/examples/member.rs b/crates/xline-client/examples/member.rs index ca348cd1d..912b5787b 100644 --- a/crates/xline-client/examples/member.rs +++ b/crates/xline-client/examples/member.rs @@ -10,14 +10,12 @@ async fn main() -> Result<()> { .await? .member_client(); - let node1 = Node::new("n1", vec!["10.0.0.4:2380"], vec!["10.0.0.4.2379"]); - let node2 = Node::new("n2", vec!["10.0.0.5:2380"], vec!["10.0.0.5.2379"]); - let ids = client.add_learner(vec![node1, node2]).await?; - - println!("got node ids of new learners: {ids:?}"); + let node1 = Node::new(1, "n1", vec!["10.0.0.4:2380"], vec!["10.0.0.4.2379"]); + let node2 = Node::new(2, "n2", vec!["10.0.0.5:2380"], vec!["10.0.0.5.2379"]); + client.add_learner(vec![node1, node2]).await?; // Remove the previously added learners - client.remove_learner(ids).await?; + client.remove_learner(vec![1, 2]).await?; Ok(()) } diff --git a/crates/xline-client/src/clients/member.rs b/crates/xline-client/src/clients/member.rs index 7a69619c1..030e7a25c 100644 --- a/crates/xline-client/src/clients/member.rs +++ b/crates/xline-client/src/clients/member.rs @@ -49,7 +49,7 @@ impl MemberClient { /// } /// ``` #[inline] - pub async fn add_learner(&self, nodes: Vec) -> Result> { + pub async fn add_learner(&self, nodes: Vec) -> Result<()> { self.curp_client .add_learner(nodes.into_iter().map(Into::into).collect()) .await @@ -102,6 +102,8 @@ impl std::fmt::Debug for MemberClient { #[derive(Clone, Debug)] #[non_exhaustive] pub struct Node { + /// The id of the node + pub node_id: u64, /// Name of the node. pub name: String, /// List of URLs used for peer-to-peer communication. @@ -114,13 +116,14 @@ impl Node { /// Creates a new `Node` #[inline] #[must_use] - pub fn new(name: N, peer_urls: AS, client_urls: AS) -> Self + pub fn new(id: u64, name: N, peer_urls: AS, client_urls: AS) -> Self where N: AsRef, A: AsRef, AS: IntoIterator, { Self { + node_id: id, name: name.as_ref().to_owned(), peer_urls: peer_urls .into_iter() @@ -134,13 +137,17 @@ impl Node { } } -impl From for curp::rpc::NodeMetadata { +impl From for curp::rpc::Node { #[inline] fn from(node: Node) -> Self { - curp::rpc::NodeMetadata { + let meta = curp::rpc::NodeMetadata { name: node.name, peer_urls: node.peer_urls, client_urls: node.client_urls, + }; + Self { + node_id: node.node_id, + meta: Some(meta), } } } diff --git a/crates/xline-client/tests/it/member.rs b/crates/xline-client/tests/it/member.rs index b1c9c8f56..b9e54c896 100644 --- a/crates/xline-client/tests/it/member.rs +++ b/crates/xline-client/tests/it/member.rs @@ -9,19 +9,16 @@ async fn learner_add_and_remove_are_ok() -> Result<()> { let (_cluster, client) = get_cluster_client().await.unwrap(); let client = client.member_client(); - let node1 = Node::new("n1", vec!["10.0.0.4:2380"], vec!["10.0.0.4.2379"]); - let node2 = Node::new("n2", vec!["10.0.0.5:2380"], vec!["10.0.0.5.2379"]); - let ids = client + let node1 = Node::new(1, "n1", vec!["10.0.0.4:2380"], vec!["10.0.0.4.2379"]); + let node2 = Node::new(2, "n2", vec!["10.0.0.5:2380"], vec!["10.0.0.5.2379"]); + client .add_learner(vec![node1, node2]) .await .expect("failed to add learners"); - let added = ids.len(); - assert_eq!(added, 2, "expected 2 learners to be added, got {added}"); - // Remove the previously added learners client - .remove_learner(ids) + .remove_learner(vec![1, 2]) .await .expect("failed to remove learners"); diff --git a/crates/xline/src/server/cluster_server.rs b/crates/xline/src/server/cluster_server.rs index e97bd2b3a..6604990f5 100644 --- a/crates/xline/src/server/cluster_server.rs +++ b/crates/xline/src/server/cluster_server.rs @@ -1,6 +1,6 @@ use std::{collections::BTreeSet, sync::Arc}; -use curp::rpc::{CurpError, NodeMetadata}; +use curp::rpc::{Node, NodeMetadata}; use rand::Rng; use tonic::{Request, Response, Status}; use xlineapi::{ @@ -51,6 +51,11 @@ impl ClusterServer { let suffix_num: u32 = rng.gen(); format!("xline_{suffix_num:08x}") } + + /// Generates a random node ID. + fn gen_rand_node_id() -> u64 { + rand::thread_rng().gen() + } } #[tonic::async_trait] @@ -62,13 +67,10 @@ impl Cluster for ClusterServer { let header = self.header_gen.gen_header(); let request = request.into_inner(); let name = Self::gen_rand_node_name(); - let node = NodeMetadata::new(name, request.peer_ur_ls, vec![]); - let ids = self.client.add_learner(vec![node]).await?; - let id = ids - .into_iter() - .next() - .ok_or(tonic::Status::internal("invalid member added"))?; - + let id = Self::gen_rand_node_id(); + let meta = NodeMetadata::new(name, request.peer_ur_ls, vec![]); + let node = Node::new(id, meta); + self.client.add_learner(vec![node]).await?; if !request.is_learner { self.client.add_member(vec![id]).await?; } From 32fa89922159fbace6d48483cf3ca2f428bc6c3e Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 23 Sep 2024 15:57:14 +0800 Subject: [PATCH 199/322] refactor: implement member update in `ClusterServer` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/xline/src/server/cluster_server.rs | 32 +++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/crates/xline/src/server/cluster_server.rs b/crates/xline/src/server/cluster_server.rs index 6604990f5..f4e91ac35 100644 --- a/crates/xline/src/server/cluster_server.rs +++ b/crates/xline/src/server/cluster_server.rs @@ -108,9 +108,37 @@ impl Cluster for ClusterServer { async fn member_update( &self, - _request: Request, + request: Request, ) -> Result, Status> { - unimplemented!() + let header = self.header_gen.gen_header(); + let request = request.into_inner(); + let id = request.id; + let mut members = self.fetch_members(true).await?; + let member = members + .iter_mut() + .find(|m| m.id == id) + .ok_or(tonic::Status::internal("invalid member id"))?; + + if !member.is_learner { + self.client.remove_member(vec![id]).await?; + } + self.client.remove_learner(vec![id]).await?; + + let meta = NodeMetadata::new( + member.name.clone(), + request.peer_ur_ls.clone(), + member.client_ur_ls.clone(), + ); + let node = Node::new(id, meta); + self.client.add_learner(vec![node]).await?; + self.client.add_member(vec![id]).await?; + + member.peer_ur_ls = request.peer_ur_ls; + + Ok(tonic::Response::new(MemberUpdateResponse { + header: Some(header), + members, + })) } async fn member_list( From ac28e9589cbbe19928b9c42908aaef82abda4b0b Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 24 Sep 2024 09:47:27 +0800 Subject: [PATCH 200/322] refactor: merge membership change rpcs into one Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/proto/common | 2 +- crates/curp/src/client/connect.rs | 32 ++--- crates/curp/src/client/keep_alive.rs | 43 +----- crates/curp/src/client/retry.rs | 24 +--- crates/curp/src/client/unary/mod.rs | 55 ++------ crates/curp/src/member.rs | 103 +++++--------- crates/curp/src/rpc/connect/mod.rs | 127 +++--------------- crates/curp/src/rpc/mod.rs | 21 +-- crates/curp/src/rpc/reconnect.rs | 47 ++----- .../curp/src/server/curp_node/member_impl.rs | 88 +++++------- crates/curp/src/server/mod.rs | 52 +------ .../curp/src/server/raw_curp/member_impl.rs | 9 +- crates/curp/src/server/raw_curp/tests.rs | 23 ++-- crates/curp/tests/it/server.rs | 82 ++++++++--- crates/xline-client/src/clients/member.rs | 37 ++++- crates/xline/src/server/auth_wrapper.rs | 40 ++---- crates/xline/src/server/cluster_server.rs | 37 +++-- 17 files changed, 303 insertions(+), 519 deletions(-) diff --git a/crates/curp/proto/common b/crates/curp/proto/common index b211a970d..81707530d 160000 --- a/crates/curp/proto/common +++ b/crates/curp/proto/common @@ -1 +1 @@ -Subproject commit b211a970d8a3ecdf90c2b25ca30a05673b5b484a +Subproject commit 81707530d471e77c0e57187d11ee0f0874a73177 diff --git a/crates/curp/src/client/connect.rs b/crates/curp/src/client/connect.rs index 1a74eb1eb..aa3748a5a 100644 --- a/crates/curp/src/client/connect.rs +++ b/crates/curp/src/client/connect.rs @@ -3,7 +3,7 @@ use curp_external_api::cmd::Command; use crate::{ members::ServerId, - rpc::{FetchMembershipResponse, Node, ReadState}, + rpc::{Change, FetchMembershipResponse, ReadState}, }; use super::retry::Context; @@ -59,17 +59,8 @@ pub trait ClientApi { .map(|resp| resp.leader_id) } - /// Add some learners to the cluster. - async fn add_learner(&self, nodes: Vec) -> Result<(), Self::Error>; - - /// Remove some learners from the cluster. - async fn remove_learner(&self, ids: Vec) -> Result<(), Self::Error>; - - /// Add some members to the cluster. - async fn add_member(&self, ids: Vec) -> Result<(), Self::Error>; - - /// Add some members to the cluster. - async fn remove_member(&self, ids: Vec) -> Result<(), Self::Error>; + /// Performs membership change + async fn change_membership(&self, changes: Vec) -> Result<(), Self::Error>; } /// This trait override some unrepeatable methods in ClientApi, and a client with this trait will be able to retry. @@ -104,15 +95,10 @@ pub(crate) trait RepeatableClientApi { ctx: Context, ) -> Result; - /// Add some learners to the cluster. - async fn add_learner(&self, nodes: Vec, ctx: Context) -> Result<(), Self::Error>; - - /// Remove some learners from the cluster. - async fn remove_learner(&self, ids: Vec, ctx: Context) -> Result<(), Self::Error>; - - /// Add some members to the cluster. - async fn add_member(&self, ids: Vec, ctx: Context) -> Result<(), Self::Error>; - - /// Remove some members from the cluster. - async fn remove_member(&self, ids: Vec, ctx: Context) -> Result<(), Self::Error>; + /// Performs membership change + async fn change_membership( + &self, + changes: Vec, + ctx: Context, + ) -> Result<(), Self::Error>; } diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index c7e597aea..d7ea6cfb6 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -155,14 +155,11 @@ mod tests { use crate::{ member::Membership, rpc::{ - connect::{ConnectApi, MockConnectApi}, - AddLearnerRequest, AddLearnerResponse, AddMemberRequest, AddMemberResponse, CurpError, + connect::ConnectApi, ChangeMembershipRequest, ChangeMembershipResponse, FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, - FetchReadStateResponse, Member, MoveLeaderRequest, MoveLeaderResponse, NodeMetadata, - OpResponse, ProposeId, ProposeRequest, ProposeResponse, ReadIndexResponse, - RecordRequest, RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, - RemoveMemberRequest, RemoveMemberResponse, ResponseOp, ShutdownRequest, - ShutdownResponse, SyncedResponse, + FetchReadStateResponse, MoveLeaderRequest, MoveLeaderResponse, NodeMetadata, + OpResponse, ProposeRequest, ReadIndexResponse, RecordRequest, RecordResponse, + ShutdownRequest, ShutdownResponse, }, }; @@ -258,37 +255,11 @@ mod tests { unreachable!("please use MockedConnectApi") } - async fn add_learner( + async fn change_membership( &self, - _request: AddLearnerRequest, + _request: ChangeMembershipRequest, _timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") - } - - async fn remove_learner( - &self, - _request: RemoveLearnerRequest, - _timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") - } - - /// Add a learner to the cluster. - async fn add_member( - &self, - request: AddMemberRequest, - timeout: Duration, - ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") - } - - /// Remove a learner from the cluster. - async fn remove_member( - &self, - request: RemoveMemberRequest, - timeout: Duration, - ) -> Result, CurpError> { + ) -> Result, CurpError> { unreachable!("please use MockedConnectApi") } } diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index aa609ee23..653f7204a 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -19,7 +19,7 @@ use super::{ }; use crate::{ members::ServerId, - rpc::{CurpError, ReadState, Redirect, ProposeId, FetchMembershipResponse, NodeMetadata, Node}, tracker::Tracker, + rpc::{CurpError, ReadState, Redirect, ProposeId, FetchMembershipResponse, NodeMetadata, Node, Change}, tracker::Tracker, }; /// Backoff config @@ -433,29 +433,13 @@ where .await } - /// Add some learners to the cluster. - async fn add_learner(&self, nodes: Vec) -> Result<(), Self::Error> { - self.retry::<_, _>(|client, ctx| client.add_learner(nodes.clone(), ctx)) - .await - } - /// Remove some learners from the cluster. - async fn remove_learner(&self, ids: Vec) -> Result<(), Self::Error> { - self.retry::<_, _>(|client, ctx| client.remove_learner(ids.clone(), ctx)) + /// Performs membership change + async fn change_membership(&self, changes: Vec) -> Result<(), Self::Error> { + self.retry::<_, _>(|client, ctx| client.change_membership(changes.clone(), ctx)) .await } - /// Add some members to the cluster. - async fn add_member(&self, ids: Vec) -> Result<(), Self::Error> { - self.retry::<_, _>(|client, ctx| client.add_member(ids.clone(), ctx)) - .await - } - - /// Add some members to the cluster. - async fn remove_member(&self, ids: Vec) -> Result<(), Self::Error> { - self.retry::<_, _>(|client, ctx| client.remove_member(ids.clone(), ctx)) - .await - } } /// Tests for backoff diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 5a17be986..5285237cd 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -13,8 +13,8 @@ use super::{ retry::Context, }; use crate::rpc::{ - AddLearnerRequest, AddMemberRequest, CurpError, FetchReadStateRequest, MoveLeaderRequest, Node, - ReadState, RemoveLearnerRequest, RemoveMemberRequest, ShutdownRequest, + Change, ChangeMembershipRequest, CurpError, FetchReadStateRequest, MembershipChange, + MoveLeaderRequest, ReadState, ShutdownRequest, }; /// The unary client @@ -109,49 +109,20 @@ impl RepeatableClientApi for Unary { Ok(state) } - /// Add some learners to the cluster. - async fn add_learner(&self, nodes: Vec, ctx: Context) -> Result<(), Self::Error> { - let req = AddLearnerRequest { nodes }; + async fn change_membership( + &self, + changes: Vec, + ctx: Context, + ) -> Result<(), Self::Error> { + let changes = changes + .into_iter() + .map(|c| MembershipChange { change: Some(c) }) + .collect(); + let req = ChangeMembershipRequest { changes }; let timeout = self.config.wait_synced_timeout(); let _ignore = ctx .cluster_state() - .map_leader(|conn| async move { conn.add_learner(req, timeout).await }) - .await?; - - Ok(()) - } - - /// Remove some learners from the cluster. - async fn remove_learner(&self, ids: Vec, ctx: Context) -> Result<(), Self::Error> { - let req = RemoveLearnerRequest { node_ids: ids }; - let timeout = self.config.wait_synced_timeout(); - let _ig = ctx - .cluster_state() - .map_leader(|conn| async move { conn.remove_learner(req, timeout).await }) - .await?; - - Ok(()) - } - - /// Add some members to the cluster. - async fn add_member(&self, ids: Vec, ctx: Context) -> Result<(), Self::Error> { - let req = AddMemberRequest { node_ids: ids }; - let timeout = self.config.wait_synced_timeout(); - let _ig = ctx - .cluster_state() - .map_leader(|conn| async move { conn.add_member(req, timeout).await }) - .await?; - - Ok(()) - } - - /// Add some members to the cluster. - async fn remove_member(&self, ids: Vec, ctx: Context) -> Result<(), Self::Error> { - let req = RemoveMemberRequest { node_ids: ids }; - let timeout = self.config.wait_synced_timeout(); - let _ig = ctx - .cluster_state() - .map_leader(|conn| async move { conn.remove_member(req, timeout).await }) + .map_leader(|conn| async move { conn.change_membership(req, timeout).await }) .await?; Ok(()) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index d0a75cb7d..78686e62f 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -1,4 +1,3 @@ -use std::collections::btree_map::Entry; use std::collections::hash_map::DefaultHasher; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -12,6 +11,7 @@ use serde::Serialize; use crate::quorum::Joint; use crate::quorum::QuorumSet; +use crate::rpc::Change; use crate::rpc::NodeMetadata; /// The membership info, used to build the initial states @@ -186,54 +186,48 @@ impl Membership { /// Generates a new membership from `Change` /// /// Returns `None` if the change is invalid - pub(crate) fn change(&self, change: Change) -> Vec { - match change { - Change::AddLearner(learners) => { - let members = self.members.clone(); - let mut nodes = self.nodes.clone(); - for (id, meta) in learners { - match nodes.entry(id) { - Entry::Occupied(_) => return vec![], - Entry::Vacant(e) => { - let _ignore = e.insert(meta); - } + pub(crate) fn changes(&self, changes: Changes) -> Vec + where + Changes: IntoIterator, + { + let mut nodes = self.nodes.clone(); + let members = self.members.clone(); + let is_member = |id: &u64| members.iter().any(|s| s.contains(id)); + let mut set = self + .members + .last() + .unwrap_or_else(|| unreachable!("there should be at least one member set")) + .clone(); + + for change in changes { + match change { + Change::Add(node) => { + let (id, meta) = node.into_parts(); + if nodes.insert(id, meta).is_some() { + return vec![]; } } - - vec![Self { members, nodes }] - } - Change::RemoveLearner(ids) => { - let members = self.members.clone(); - let mut nodes = self.nodes.clone(); - for id in ids { + Change::Remove(id) => { if nodes.remove(&id).is_none() { return vec![]; } } - - vec![Self { members, nodes }] + Change::Promote(id) => { + if !nodes.contains_key(&id) || is_member(&id) { + return vec![]; + } + let _ignore = set.insert(id); + } + Change::Demote(id) => { + if !nodes.contains_key(&id) || !is_member(&id) { + return vec![]; + } + let _ignore = set.remove(&id); + } } - Change::AddMember(ids) => self.update_members(ids, |i, set| { - set.union(&i.into_iter().collect()).copied().collect() - }), - Change::RemoveMember(ids) => self.update_members(ids, |i, set| { - set.difference(&i.into_iter().collect()).copied().collect() - }), } - } - /// Updates the membership based on the given operation and returns - /// a vector of coherent memberships. - fn update_members(&self, ids: Vec, op: F) -> Vec - where - F: FnOnce(Vec, BTreeSet) -> BTreeSet, - { - if !self.exists(&ids) { - return vec![]; - } - let last = self.last_set(); - let target = op(ids, last); - self.all_coherent(&target) + self.all_coherent(&set) } /// Generates all coherent membership to reach the target @@ -255,21 +249,6 @@ impl Membership { } } - /// Returns the last member set - /// - fn last_set(&self) -> BTreeSet { - self.members - .last() - .unwrap_or_else(|| unreachable!("there should be at least one member set")) - .clone() - } - - /// Validates the given ids for member operations - fn exists(&self, ids: &[u64]) -> bool { - // Ids should be in nodes - ids.iter().all(|id| self.nodes.contains_key(id)) - } - /// Converts to `Joint` pub(crate) fn as_joint(&self) -> Joint, &[BTreeSet]> { Joint::new(self.members.as_slice()) @@ -296,20 +275,6 @@ impl Membership { } } -#[allow(unused)] -/// The change of membership -#[derive(Clone)] -pub(crate) enum Change { - /// Adds learners - AddLearner(Vec<(u64, NodeMetadata)>), - /// Removes learners - RemoveLearner(Vec), - /// Adds members - AddMember(Vec), - /// Removes members - RemoveMember(Vec), -} - /// Trait for types that can provide a cluster ID. pub trait ClusterId { /// Returns the cluster ID. diff --git a/crates/curp/src/rpc/connect/mod.rs b/crates/curp/src/rpc/connect/mod.rs index e4e4381f7..af1c7a528 100644 --- a/crates/curp/src/rpc/connect/mod.rs +++ b/crates/curp/src/rpc/connect/mod.rs @@ -37,10 +37,9 @@ use crate::{ commandpb::protocol_client::ProtocolClient, inner_messagepb::inner_protocol_client::InnerProtocolClient, }, - AddMemberRequest, AddMemberResponse, AppendEntriesRequest, AppendEntriesResponse, - CurpError, FetchReadStateRequest, FetchReadStateResponse, InstallSnapshotRequest, - InstallSnapshotResponse, LeaseKeepAliveMsg, MoveLeaderRequest, MoveLeaderResponse, - ProposeRequest, Protocol, RemoveMemberRequest, RemoveMemberResponse, ShutdownRequest, + AppendEntriesRequest, AppendEntriesResponse, CurpError, FetchReadStateRequest, + FetchReadStateResponse, InstallSnapshotRequest, InstallSnapshotResponse, LeaseKeepAliveMsg, + MoveLeaderRequest, MoveLeaderResponse, ProposeRequest, Protocol, ShutdownRequest, ShutdownResponse, TriggerShutdownRequest, TryBecomeLeaderNowRequest, VoteRequest, VoteResponse, }, @@ -51,8 +50,8 @@ use crate::{ use super::{ proto::commandpb::{ReadIndexRequest, ReadIndexResponse}, reconnect::Reconnect, - AddLearnerRequest, AddLearnerResponse, FetchMembershipRequest, FetchMembershipResponse, - OpResponse, RecordRequest, RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, + ChangeMembershipRequest, ChangeMembershipResponse, FetchMembershipRequest, + FetchMembershipResponse, OpResponse, RecordRequest, RecordResponse, }; /// Install snapshot chunk size: 64KB @@ -224,33 +223,12 @@ pub(crate) trait ConnectApi: Send + Sync + 'static { timeout: Duration, ) -> Result, CurpError>; - /// Add a learner to the cluster. - async fn add_learner( + /// Changes the membership + async fn change_membership( &self, - request: AddLearnerRequest, + request: ChangeMembershipRequest, timeout: Duration, - ) -> Result, CurpError>; - - /// Remove a learner from the cluster. - async fn remove_learner( - &self, - request: RemoveLearnerRequest, - timeout: Duration, - ) -> Result, CurpError>; - - /// Add a learner to the cluster. - async fn add_member( - &self, - request: AddMemberRequest, - timeout: Duration, - ) -> Result, CurpError>; - - /// Remove a learner from the cluster. - async fn remove_member( - &self, - request: RemoveMemberRequest, - timeout: Duration, - ) -> Result, CurpError>; + ) -> Result, CurpError>; } /// Inner Connect interface among different servers @@ -522,49 +500,14 @@ impl ConnectApi for Connect> { with_timeout!(timeout, client.fetch_membership(req)).map_err(Into::into) } - async fn add_learner( - &self, - request: AddLearnerRequest, - timeout: Duration, - ) -> Result, CurpError> { - let mut client = self.rpc_connect.clone(); - let mut req = tonic::Request::new(request); - req.metadata_mut().inject_current(); - with_timeout!(timeout, client.add_learner(req)).map_err(Into::into) - } - - async fn remove_learner( - &self, - request: RemoveLearnerRequest, - timeout: Duration, - ) -> Result, CurpError> { - let mut client = self.rpc_connect.clone(); - let mut req = tonic::Request::new(request); - req.metadata_mut().inject_current(); - with_timeout!(timeout, client.remove_learner(req)).map_err(Into::into) - } - - async fn add_member( - &self, - request: AddMemberRequest, - timeout: Duration, - ) -> Result, CurpError> { - let mut client = self.rpc_connect.clone(); - let mut req = tonic::Request::new(request); - req.metadata_mut().inject_current(); - with_timeout!(timeout, client.add_member(req)).map_err(Into::into) - } - - /// Remove a learner from the cluster. - async fn remove_member( + async fn change_membership( &self, - request: RemoveMemberRequest, + request: ChangeMembershipRequest, timeout: Duration, - ) -> Result, CurpError> { + ) -> Result, CurpError> { let mut client = self.rpc_connect.clone(); - let mut req = tonic::Request::new(request); - req.metadata_mut().inject_current(); - with_timeout!(timeout, client.remove_member(req)).map_err(Into::into) + let req = tonic::Request::new(request); + with_timeout!(timeout, client.change_membership(req)).map_err(Into::into) } } @@ -833,49 +776,15 @@ where self.server.fetch_membership(req).await.map_err(Into::into) } - async fn add_learner( - &self, - request: AddLearnerRequest, - _timeout: Duration, - ) -> Result, CurpError> { - let mut req = tonic::Request::new(request); - req.metadata_mut().inject_bypassed(); - req.metadata_mut().inject_current(); - self.server.add_learner(req).await.map_err(Into::into) - } - - async fn remove_learner( - &self, - request: RemoveLearnerRequest, - _timeout: Duration, - ) -> Result, CurpError> { - let mut req = tonic::Request::new(request); - req.metadata_mut().inject_bypassed(); - req.metadata_mut().inject_current(); - self.server.remove_learner(req).await.map_err(Into::into) - } - - async fn add_member( - &self, - request: AddMemberRequest, - _timeout: Duration, - ) -> Result, CurpError> { - let mut req = tonic::Request::new(request); - req.metadata_mut().inject_bypassed(); - req.metadata_mut().inject_current(); - self.server.add_member(req).await.map_err(Into::into) - } - - /// Remove a learner from the cluster. - async fn remove_member( + async fn change_membership( &self, - request: RemoveMemberRequest, + request: ChangeMembershipRequest, _timeout: Duration, - ) -> Result, CurpError> { + ) -> Result, CurpError> { let mut req = tonic::Request::new(request); req.metadata_mut().inject_bypassed(); req.metadata_mut().inject_current(); - self.server.remove_member(req).await.map_err(Into::into) + self.server.change_membership(req).await.map_err(Into::into) } } diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index edc20033f..85dae21af 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -19,6 +19,7 @@ pub(crate) use self::proto::{ }; pub use self::proto::{ commandpb::{ + change_membership_request::{membership_change::Change, MembershipChange}, cmd_result::Result as CmdResultInner, curp_error::Err as CurpError, // easy for match curp_error::Redirect, @@ -26,10 +27,8 @@ pub use self::proto::{ op_response::Op as ResponseOp, protocol_client, protocol_server::{Protocol, ProtocolServer}, - AddLearnerRequest, - AddLearnerResponse, - AddMemberRequest, - AddMemberResponse, + ChangeMembershipRequest, + ChangeMembershipResponse, CmdResult, FetchMembershipRequest, FetchMembershipResponse, @@ -51,10 +50,6 @@ pub use self::proto::{ ReadIndexResponse, RecordRequest, RecordResponse, - RemoveLearnerRequest, - RemoveLearnerResponse, - RemoveMemberRequest, - RemoveMemberResponse, ShutdownRequest, ShutdownResponse, SyncedResponse, @@ -888,3 +883,13 @@ impl NodeMetadata { self.client_urls } } + +impl MembershipChange { + /// Consumes the wrapper and returns the inner `Change`. + #[allow(clippy::unwrap_used, clippy::missing_panics_doc)] + #[inline] + #[must_use] + pub fn into_inner(self) -> Change { + self.change.unwrap() + } +} diff --git a/crates/curp/src/rpc/reconnect.rs b/crates/curp/src/rpc/reconnect.rs index 2d6843c28..2e689be46 100644 --- a/crates/curp/src/rpc/reconnect.rs +++ b/crates/curp/src/rpc/reconnect.rs @@ -7,12 +7,10 @@ use futures::Stream; use crate::{ members::ServerId, rpc::{ - connect::ConnectApi, AddLearnerRequest, AddLearnerResponse, AddMemberRequest, - AddMemberResponse, CurpError, FetchMembershipRequest, FetchMembershipResponse, - FetchReadStateRequest, FetchReadStateResponse, MoveLeaderRequest, MoveLeaderResponse, - OpResponse, ProposeRequest, ReadIndexResponse, RecordRequest, RecordResponse, - RemoveLearnerRequest, RemoveLearnerResponse, RemoveMemberRequest, RemoveMemberResponse, - ShutdownRequest, ShutdownResponse, + connect::ConnectApi, ChangeMembershipRequest, ChangeMembershipResponse, CurpError, + FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, + FetchReadStateResponse, MoveLeaderRequest, MoveLeaderResponse, OpResponse, ProposeRequest, + ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, ShutdownResponse, }, }; @@ -165,39 +163,12 @@ impl ConnectApi for Reconnect { execute_with_reconnect!(self, ConnectApi::fetch_membership, request, timeout) } - /// Add a learner to the cluster. - async fn add_learner( + /// Changes the membership + async fn change_membership( &self, - request: AddLearnerRequest, + request: ChangeMembershipRequest, timeout: Duration, - ) -> Result, CurpError> { - execute_with_reconnect!(self, ConnectApi::add_learner, request, timeout) - } - - /// Remove a learner from the cluster. - async fn remove_learner( - &self, - request: RemoveLearnerRequest, - timeout: Duration, - ) -> Result, CurpError> { - execute_with_reconnect!(self, ConnectApi::remove_learner, request, timeout) - } - - /// Add a learner to the cluster. - async fn add_member( - &self, - request: AddMemberRequest, - timeout: Duration, - ) -> Result, CurpError> { - execute_with_reconnect!(self, ConnectApi::add_member, request, timeout) - } - - /// Remove a learner from the cluster. - async fn remove_member( - &self, - request: RemoveMemberRequest, - timeout: Duration, - ) -> Result, CurpError> { - execute_with_reconnect!(self, ConnectApi::remove_member, request, timeout) + ) -> Result, CurpError> { + execute_with_reconnect!(self, ConnectApi::change_membership, request, timeout) } } diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 3cd731f89..24ccbbbb1 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -4,6 +4,7 @@ clippy::needless_pass_by_value )] // TODO: remove this after implemented +use std::collections::HashSet; use std::sync::Arc; use curp_external_api::cmd::Command; @@ -11,74 +12,55 @@ use curp_external_api::cmd::CommandExecutor; use curp_external_api::role_change::RoleChange; use utils::task_manager::tasks::TaskName; -use crate::member::Change; -use crate::rpc::AddLearnerRequest; -use crate::rpc::AddLearnerResponse; -use crate::rpc::AddMemberRequest; -use crate::rpc::AddMemberResponse; +use super::CurpNode; +use crate::member::Membership; +use crate::rpc::Change; +use crate::rpc::ChangeMembershipRequest; +use crate::rpc::ChangeMembershipResponse; use crate::rpc::CurpError; -use crate::rpc::Node; +use crate::rpc::MembershipChange; use crate::rpc::Redirect; -use crate::rpc::RemoveLearnerRequest; -use crate::rpc::RemoveLearnerResponse; -use crate::rpc::RemoveMemberRequest; -use crate::rpc::RemoveMemberResponse; - -use super::CurpNode; impl, RC: RoleChange> CurpNode { - /// Adds a learner to the cluster - pub(crate) async fn add_learner( + /// Performs a membership change to the cluster + pub(crate) async fn change_membership( &self, - request: AddLearnerRequest, - ) -> Result { + request: ChangeMembershipRequest, + ) -> Result { self.ensure_leader()?; - let nodes = request.nodes.into_iter().map(Node::into_parts).collect(); - self.update_and_wait(Change::AddLearner(nodes)).await?; + let changes = request + .changes + .into_iter() + .map(MembershipChange::into_inner); + let changes = Self::ensure_non_overlapping(changes)?; + let configs = self.curp.generate_membership(changes); + self.update_and_wait(configs).await?; - Ok(AddLearnerResponse {}) + Ok(ChangeMembershipResponse {}) } - /// Removes a learner from the cluster - pub(crate) async fn remove_learner( - &self, - request: RemoveLearnerRequest, - ) -> Result { - self.ensure_leader()?; - self.update_and_wait(Change::RemoveLearner(request.node_ids)) - .await?; - - Ok(RemoveLearnerResponse {}) - } + /// Ensures there are no overlapping ids + fn ensure_non_overlapping(changes: Changes) -> Result, CurpError> + where + Changes: IntoIterator, + { + let changes: Vec<_> = changes.into_iter().collect(); + let mut ids = changes.iter().map(|c| match *c { + Change::Add(ref node) => node.node_id, + Change::Remove(id) | Change::Promote(id) | Change::Demote(id) => id, + }); - /// Promotes a learner to a member - pub(crate) async fn add_member( - &self, - request: AddMemberRequest, - ) -> Result { - self.ensure_leader()?; - self.update_and_wait(Change::AddMember(request.node_ids)) - .await?; - - Ok(AddMemberResponse {}) - } - - /// Demotes a member to a learner - pub(crate) async fn remove_member( - &self, - request: RemoveMemberRequest, - ) -> Result { - self.ensure_leader()?; - self.update_and_wait(Change::RemoveMember(request.node_ids)) - .await?; + let mut set = HashSet::new(); + if ids.all(|id| set.insert(id)) { + return Ok(changes); + } - Ok(RemoveMemberResponse {}) + Err(CurpError::InvalidConfig(())) } /// Updates the membership based on the given change and waits for /// the proposal to be committed - async fn update_and_wait(&self, change: Change) -> Result<(), CurpError> { - let configs = self.curp.generate_membership(change.clone()); + async fn update_and_wait(&self, configs: Vec) -> Result<(), CurpError> { if configs.is_empty() { return Err(CurpError::invalid_member_change()); } diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index 69b289cab..4b729111f 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -23,10 +23,10 @@ use crate::member::MembershipInfo; use crate::response::ResponseSender; use crate::role_change::RoleChange; use crate::rpc::connect::Bypass; -use crate::rpc::AddLearnerRequest; -use crate::rpc::AddLearnerResponse; use crate::rpc::AppendEntriesRequest; use crate::rpc::AppendEntriesResponse; +use crate::rpc::ChangeMembershipRequest; +use crate::rpc::ChangeMembershipResponse; use crate::rpc::FetchMembershipRequest; use crate::rpc::FetchMembershipResponse; use crate::rpc::FetchReadStateRequest; @@ -42,8 +42,6 @@ use crate::rpc::ReadIndexRequest; use crate::rpc::ReadIndexResponse; use crate::rpc::RecordRequest; use crate::rpc::RecordResponse; -use crate::rpc::RemoveLearnerRequest; -use crate::rpc::RemoveLearnerResponse; use crate::rpc::ShutdownRequest; use crate::rpc::ShutdownResponse; use crate::rpc::TriggerShutdownRequest; @@ -194,49 +192,13 @@ impl, RC: RoleChange> crate::rpc::Protocol fo )) } - #[instrument(skip_all, name = "add_learner")] - async fn add_learner( + #[instrument(skip_all, name = "change_membership")] + async fn change_membership( &self, - request: tonic::Request, - ) -> Result, tonic::Status> { + request: tonic::Request, + ) -> Result, tonic::Status> { self.inner - .add_learner(request.into_inner()) - .await - .map(tonic::Response::new) - .map_err(Into::into) - } - - #[instrument(skip_all, name = "remove_learner")] - async fn remove_learner( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - self.inner - .remove_learner(request.into_inner()) - .await - .map(tonic::Response::new) - .map_err(Into::into) - } - - #[instrument(skip_all, name = "add_member")] - async fn add_member( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - self.inner - .add_member(request.into_inner()) - .await - .map(tonic::Response::new) - .map_err(Into::into) - } - - #[instrument(skip_all, name = "remove_member")] - async fn remove_member( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - self.inner - .remove_member(request.into_inner()) + .change_membership(request.into_inner()) .await .map(tonic::Response::new) .map_err(Into::into) diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index c0040621a..94527dea9 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -10,11 +10,11 @@ use utils::parking_lot_lock::RwLockMap; use crate::log_entry::EntryData; use crate::log_entry::LogEntry; -use crate::member::Change; use crate::member::Membership; use crate::member::NodeMembershipState; use crate::rpc::connect::InnerConnectApiWrapper; use crate::rpc::inner_connects; +use crate::rpc::Change; use crate::rpc::ProposeId; use crate::server::StorageApi; use crate::server::StorageError; @@ -24,9 +24,12 @@ use super::Role; impl RawCurp { /// Generate memberships based on the provided change - pub(crate) fn generate_membership(&self, change: Change) -> Vec { + pub(crate) fn generate_membership(&self, changes: Changes) -> Vec + where + Changes: IntoIterator, + { let ms_r = self.ms.read(); - ms_r.cluster().committed().change(change) + ms_r.cluster().committed().changes(changes) } /// Updates the membership config diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index afbadd1f8..a07098d20 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -9,8 +9,7 @@ use utils::config::{ use super::*; use crate::{ - member::Change, - rpc::{self, NodeMetadata, Redirect}, + rpc::{self, Change, Node, NodeMetadata, Redirect}, server::{ cmd_board::CommandBoard, conflict::test_pools::{TestSpecPool, TestUncomPool}, @@ -703,7 +702,7 @@ async fn add_learner_node_and_promote_should_success() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; let membership = curp - .generate_membership(Change::AddLearner(vec![(3, NodeMetadata::default())])) + .generate_membership(Some(Change::Add(Node::new(3, NodeMetadata::default())))) .pop() .unwrap(); let _ignore = curp.update_membership(membership, |_, _, _| {}).unwrap(); @@ -715,7 +714,7 @@ async fn add_learner_node_and_promote_should_success() { .any(|id| *id == 3)); curp.membership_commit_to(1); let membership = curp - .generate_membership(Change::AddMember(vec![3])) + .generate_membership(Some(Change::Promote(3))) .pop() .unwrap(); let _ignore = curp.update_membership(membership, |_, _, _| {}).unwrap(); @@ -734,13 +733,13 @@ fn add_exists_node_should_have_no_effect() { let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; let exists_node_id = curp.get_id_by_name("S1").unwrap(); assert!(curp - .generate_membership(Change::AddLearner(vec![( + .generate_membership(Some(Change::Add(Node::new( exists_node_id, NodeMetadata::default(), - )])) + )))) .is_empty()); assert!(curp - .generate_membership(Change::AddMember(vec![exists_node_id])) + .generate_membership(Some(Change::Promote(exists_node_id))) .is_empty()); } @@ -751,7 +750,7 @@ async fn remove_node_should_remove_node_from_curp() { let curp = { Arc::new(RawCurp::new_test(5, mock_role_change(), task_manager)) }; let follower_id = curp.get_id_by_name("S1").unwrap(); let membership = curp - .generate_membership(Change::RemoveMember(vec![follower_id])) + .generate_membership(Some(Change::Demote(follower_id))) .pop() .unwrap(); let _ignore = curp.update_membership(membership, |_, _, _| {}).unwrap(); @@ -770,10 +769,10 @@ fn remove_non_exists_node_should_have_no_effect() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(5, mock_role_change(), task_manager)) }; assert!(curp - .generate_membership(Change::RemoveLearner(vec![10])) + .generate_membership(Some(Change::Remove(10))) .is_empty()); assert!(curp - .generate_membership(Change::RemoveMember(vec![10])) + .generate_membership(Some(Change::Demote(10))) .is_empty()); } @@ -783,7 +782,7 @@ async fn follower_append_membership_change() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; let membership = curp - .generate_membership(Change::AddLearner(vec![(3, NodeMetadata::default())])) + .generate_membership(Some(Change::Add(Node::new(3, NodeMetadata::default())))) .pop() .unwrap(); @@ -804,7 +803,7 @@ async fn leader_handle_move_leader() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; let membership = curp - .generate_membership(Change::AddLearner(vec![(1234, NodeMetadata::default())])) + .generate_membership(Some(Change::Add(Node::new(1234, NodeMetadata::default())))) .pop() .unwrap(); let _ignore = curp.update_membership(membership, |_, _, _| {}).unwrap(); diff --git a/crates/curp/tests/it/server.rs b/crates/curp/tests/it/server.rs index 56fe2d4ef..068a0403c 100644 --- a/crates/curp/tests/it/server.rs +++ b/crates/curp/tests/it/server.rs @@ -9,7 +9,7 @@ use clippy_utilities::NumericCast; use curp::{ client::{ClientApi, ClientBuilder}, member::MembershipInfo, - rpc::{CurpError, Node, NodeMetadata}, + rpc::{Change, CurpError, Node, NodeMetadata}, }; use curp_test_utils::{ init_logger, sleep_millis, @@ -396,7 +396,10 @@ async fn propose_add_node_should_success() { let node_meta = NodeMetadata::new("new_node", ["addr"], ["addr"]); let node_id = 5; let node = Node::new(node_id, node_meta.clone()); - client.add_learner(vec![node]).await.unwrap(); + client + .change_membership(vec![Change::Add(node)]) + .await + .unwrap(); assert_cluster(&client, 4, 3, [NodeAssert::new(node_id, node_meta, false)]).await; } @@ -411,10 +414,16 @@ async fn propose_remove_node_should_success() { let node_meta = NodeMetadata::new("new_node", ["addr"], ["addr"]); let node_id = 5; let node = Node::new(node_id, node_meta.clone()); - client.add_learner(vec![node]).await.unwrap(); + client + .change_membership(vec![Change::Add(node)]) + .await + .unwrap(); assert_cluster(&client, 4, 3, [NodeAssert::new(node_id, node_meta, false)]).await; - client.remove_learner(vec![node_id]).await.unwrap(); + client + .change_membership(vec![Change::Remove(node_id)]) + .await + .unwrap(); assert_cluster(&client, 3, 3, []).await; } @@ -429,10 +438,16 @@ async fn propose_add_member_should_success() { let node_meta = NodeMetadata::new("new_node", ["addr"], ["addr"]); let node_id = 5; let node = Node::new(node_id, node_meta.clone()); - client.add_learner(vec![node]).await.unwrap(); + client + .change_membership(vec![Change::Add(node)]) + .await + .unwrap(); assert_cluster(&client, 4, 3, [NodeAssert::new(node_id, node_meta, false)]).await; - client.add_member(vec![node_id]).await.unwrap(); + client + .change_membership(vec![Change::Promote(node_id)]) + .await + .unwrap(); assert_cluster(&client, 4, 4, []).await; } @@ -447,16 +462,28 @@ async fn propose_remove_member_should_success() { let node_meta = NodeMetadata::new("new_node", ["addr"], ["addr"]); let node_id = 5; let node = Node::new(node_id, node_meta.clone()); - client.add_learner(vec![node]).await.unwrap(); + client + .change_membership(vec![Change::Add(node)]) + .await + .unwrap(); assert_cluster(&client, 4, 3, [NodeAssert::new(node_id, node_meta, false)]).await; - client.add_member(vec![node_id]).await.unwrap(); + client + .change_membership(vec![Change::Promote(node_id)]) + .await + .unwrap(); assert_cluster(&client, 4, 4, []).await; - client.remove_member(vec![node_id]).await.unwrap(); + client + .change_membership(vec![Change::Demote(node_id)]) + .await + .unwrap(); assert_cluster(&client, 4, 3, []).await; - client.remove_learner(vec![node_id]).await.unwrap(); + client + .change_membership(vec![Change::Remove(node_id)]) + .await + .unwrap(); assert_cluster(&client, 3, 3, []).await; } @@ -470,10 +497,16 @@ async fn propose_remove_leader_should_success() { let id = client.fetch_leader_id(true).await.unwrap(); - client.remove_member(vec![id]).await.unwrap(); + client + .change_membership(vec![Change::Demote(id)]) + .await + .unwrap(); assert_cluster(&client, 3, 2, []).await; - client.remove_learner(vec![id]).await.unwrap(); + client + .change_membership(vec![Change::Remove(id)]) + .await + .unwrap(); assert_cluster(&client, 2, 2, []).await; let new_id = client.fetch_leader_id(true).await.unwrap(); @@ -519,7 +552,10 @@ async fn propose_conf_change_to_follower() { let node_meta = NodeMetadata::new("new_node", ["addr"], ["addr"]); let node_id = 5; let node = Node::new(node_id, node_meta.clone()); - client.add_learner(vec![node]).await.unwrap(); + client + .change_membership(vec![Change::Add(node)]) + .await + .unwrap(); assert_cluster(&client, 4, 3, [NodeAssert::new(node_id, node_meta, false)]).await; } @@ -539,7 +575,10 @@ async fn new_node_should_apply_old_cluster_logs() { let node_meta = NodeMetadata::new("new_node", addrs.clone(), addrs); let node_id = 5; let node = Node::new(node_id, node_meta.clone()); - client.add_learner(vec![node]).await.unwrap(); + client + .change_membership(vec![Change::Add(node)]) + .await + .unwrap(); /******* start new node *******/ @@ -606,7 +645,10 @@ async fn shutdown_rpc_should_shutdown_the_cluster_when_client_has_wrong_cluster( let node_meta = NodeMetadata::new("new_node", addrs.clone(), addrs); let node_id = 5; let node = Node::new(node_id, node_meta); - client.add_learner(vec![node]).await.unwrap(); + client + .change_membership(vec![Change::Add(node)]) + .await + .unwrap(); group .run_node( listener, @@ -635,7 +677,10 @@ async fn propose_conf_change_rpc_should_work_when_client_has_wrong_cluster() { let node_meta = NodeMetadata::new("new_node", addrs.clone(), addrs); let node_id = 5; let node = Node::new(node_id, node_meta); - client.add_learner(vec![node]).await.unwrap(); + client + .change_membership(vec![Change::Add(node)]) + .await + .unwrap(); group .run_node( listener, @@ -643,7 +688,10 @@ async fn propose_conf_change_rpc_should_work_when_client_has_wrong_cluster() { MembershipInfo::new(node_id, BTreeMap::default()), ) .await; - client.remove_member(vec![node_id]).await.unwrap(); + client + .change_membership(vec![Change::Remove(node_id)]) + .await + .unwrap(); group .wait_for_node_shutdown(node_id, DEFAULT_SHUTDOWN_TIMEOUT) .await; diff --git a/crates/xline-client/src/clients/member.rs b/crates/xline-client/src/clients/member.rs index 030e7a25c..590a0c746 100644 --- a/crates/xline-client/src/clients/member.rs +++ b/crates/xline-client/src/clients/member.rs @@ -50,8 +50,9 @@ impl MemberClient { /// ``` #[inline] pub async fn add_learner(&self, nodes: Vec) -> Result<()> { + let changes = nodes.into_iter().map(Change::Add).map(Into::into).collect(); self.curp_client - .add_learner(nodes.into_iter().map(Into::into).collect()) + .change_membership(changes) .await .map_err(Into::into) } @@ -84,8 +85,13 @@ impl MemberClient { /// ``` #[inline] pub async fn remove_learner(&self, ids: Vec) -> Result<()> { + let changes = ids + .into_iter() + .map(Change::Remove) + .map(Into::into) + .collect(); self.curp_client - .remove_learner(ids) + .change_membership(changes) .await .map_err(Into::into) } @@ -151,3 +157,30 @@ impl From for curp::rpc::Node { } } } + +/// Represents a change in cluster membership. +#[allow(variant_size_differences)] +#[derive(Clone, Debug)] +#[non_exhaustive] +pub enum Change { + /// Adds a new learner. + Add(Node), + /// Removes a learner by its id. + Remove(u64), + /// Promotes a learner to voter + Promote(u64), + /// Demotes a voter to learner. + Demote(u64), +} + +impl From for curp::rpc::Change { + #[inline] + fn from(change: Change) -> Self { + match change { + Change::Add(node) => curp::rpc::Change::Add(node.into()), + Change::Remove(id) => curp::rpc::Change::Remove(id), + Change::Promote(id) => curp::rpc::Change::Promote(id), + Change::Demote(id) => curp::rpc::Change::Demote(id), + } + } +} diff --git a/crates/xline/src/server/auth_wrapper.rs b/crates/xline/src/server/auth_wrapper.rs index 72c3bba6a..adaba1ef2 100644 --- a/crates/xline/src/server/auth_wrapper.rs +++ b/crates/xline/src/server/auth_wrapper.rs @@ -3,12 +3,11 @@ use std::sync::Arc; use curp::{ cmd::PbCodec, rpc::{ - AddLearnerRequest, AddLearnerResponse, AddMemberRequest, AddMemberResponse, - FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, - FetchReadStateResponse, LeaseKeepAliveMsg, MoveLeaderRequest, MoveLeaderResponse, - OpResponse, ProposeRequest, Protocol, ReadIndexRequest, ReadIndexResponse, RecordRequest, - RecordResponse, RemoveLearnerRequest, RemoveLearnerResponse, RemoveMemberRequest, - RemoveMemberResponse, ShutdownRequest, ShutdownResponse, + ChangeMembershipRequest, ChangeMembershipResponse, FetchMembershipRequest, + FetchMembershipResponse, FetchReadStateRequest, FetchReadStateResponse, LeaseKeepAliveMsg, + MoveLeaderRequest, MoveLeaderResponse, OpResponse, ProposeRequest, Protocol, + ReadIndexRequest, ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, + ShutdownResponse, }, }; use flume::r#async::RecvStream; @@ -109,31 +108,10 @@ impl Protocol for AuthWrapper { self.curp_server.fetch_membership(request).await } - async fn add_learner( + async fn change_membership( &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - self.curp_server.add_learner(request).await - } - - async fn remove_learner( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - self.curp_server.remove_learner(request).await - } - - async fn add_member( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - self.curp_server.add_member(request).await - } - - async fn remove_member( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - self.curp_server.remove_member(request).await + request: tonic::Request, + ) -> Result, tonic::Status> { + self.curp_server.change_membership(request).await } } diff --git a/crates/xline/src/server/cluster_server.rs b/crates/xline/src/server/cluster_server.rs index f4e91ac35..19393994e 100644 --- a/crates/xline/src/server/cluster_server.rs +++ b/crates/xline/src/server/cluster_server.rs @@ -1,6 +1,6 @@ use std::{collections::BTreeSet, sync::Arc}; -use curp::rpc::{Node, NodeMetadata}; +use curp::rpc::{Change, Node, NodeMetadata}; use rand::Rng; use tonic::{Request, Response, Status}; use xlineapi::{ @@ -70,9 +70,13 @@ impl Cluster for ClusterServer { let id = Self::gen_rand_node_id(); let meta = NodeMetadata::new(name, request.peer_ur_ls, vec![]); let node = Node::new(id, meta); - self.client.add_learner(vec![node]).await?; + self.client + .change_membership(vec![Change::Add(node)]) + .await?; if !request.is_learner { - self.client.add_member(vec![id]).await?; + self.client + .change_membership(vec![Change::Promote(id)]) + .await?; } let members = self.fetch_members(true).await?; let added = members @@ -96,8 +100,13 @@ impl Cluster for ClusterServer { let id = request.into_inner().id; // In etcd a member could be a learner, and could return CurpError::InvalidMemberChange // TODO: handle other errors that may returned - let _ignore = self.client.remove_member(vec![id]).await; - self.client.remove_learner(vec![id]).await?; + let _ignore = self + .client + .change_membership(vec![Change::Demote(id)]) + .await; + self.client + .change_membership(vec![Change::Remove(id)]) + .await?; let members = self.fetch_members(true).await?; Ok(tonic::Response::new(MemberRemoveResponse { @@ -120,9 +129,13 @@ impl Cluster for ClusterServer { .ok_or(tonic::Status::internal("invalid member id"))?; if !member.is_learner { - self.client.remove_member(vec![id]).await?; + self.client + .change_membership(vec![Change::Demote(id)]) + .await?; } - self.client.remove_learner(vec![id]).await?; + self.client + .change_membership(vec![Change::Remove(id)]) + .await?; let meta = NodeMetadata::new( member.name.clone(), @@ -130,8 +143,12 @@ impl Cluster for ClusterServer { member.client_ur_ls.clone(), ); let node = Node::new(id, meta); - self.client.add_learner(vec![node]).await?; - self.client.add_member(vec![id]).await?; + self.client + .change_membership(vec![Change::Add(node)]) + .await?; + self.client + .change_membership(vec![Change::Promote(id)]) + .await?; member.peer_ur_ls = request.peer_ur_ls; @@ -161,7 +178,7 @@ impl Cluster for ClusterServer { ) -> Result, Status> { let header = self.header_gen.gen_header(); self.client - .add_member(vec![request.into_inner().id]) + .change_membership(vec![Change::Promote(request.into_inner().id)]) .await?; let members = self.fetch_members(true).await?; Ok(tonic::Response::new(MemberPromoteResponse { From 7e745c47d98fcbb50aefa10de17ba137d97c77df Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 24 Sep 2024 15:55:30 +0800 Subject: [PATCH 201/322] refactor: remove notification on `NodeState` drop This change prevents notification when `NodeState` is cloned. Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/raw_curp/node_state.rs | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/crates/curp/src/server/raw_curp/node_state.rs b/crates/curp/src/server/raw_curp/node_state.rs index a712cad0a..46072b9da 100644 --- a/crates/curp/src/server/raw_curp/node_state.rs +++ b/crates/curp/src/server/raw_curp/node_state.rs @@ -51,6 +51,10 @@ impl NodeStates { let old_ids: BTreeSet<_> = states_w.keys().copied().collect(); let added: BTreeSet<_> = ids.difference(&old_ids).copied().collect(); let removed: BTreeSet<_> = old_ids.difference(ids).copied().collect(); + removed + .iter() + .filter_map(|id| states_w.remove(id)) + .for_each(|s| s.notify_remove()); states_w.retain(|id, _| !removed.contains(id)); let new_connects = connect_to(&added); let new_states: BTreeMap<_, _> = added @@ -168,7 +172,7 @@ impl NodeStates { /// The state of a node #[derive(Clone, Debug)] -pub(super) struct NodeState { +pub(crate) struct NodeState { /// The status of current node status: NodeStatus, /// The connect to the node @@ -210,11 +214,28 @@ impl NodeState { &self.sync_event } + /// Notify the remove event + pub(super) fn notify_remove(&self) { + let _ignore = self.remove_event.notify(1); + } + /// Get a mutable reference to the status of the current node pub(super) fn status_mut(&mut self) -> &mut NodeStatus { &mut self.status } + /// Decomposes the `NodeState` into its constituent parts. + pub(crate) fn into_parts(self) -> (InnerConnectApiWrapper, Arc, Arc) { + let NodeState { + connect, + sync_event, + remove_event, + .. + } = self; + + (connect, sync_event, remove_event) + } + /// Clone parts of self pub(super) fn clone_parts( &self, @@ -233,9 +254,3 @@ impl NodeState { ) } } - -impl Drop for NodeState { - fn drop(&mut self) { - let _ignore = self.remove_event.notify(1); - } -} From 25d8fc8a4f21fc0107adcd6ca748379330ba53cb Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 24 Sep 2024 15:43:36 +0800 Subject: [PATCH 202/322] refactor: move sync follower task spawning to CurpNode The `RawCurp` should be used in a sync context. Spawning a background task requires a Tokio runtime to be running. Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- .../curp/src/server/curp_node/member_impl.rs | 32 +++++++---- crates/curp/src/server/curp_node/mod.rs | 15 +----- .../curp/src/server/raw_curp/member_impl.rs | 39 +++++--------- crates/curp/src/server/raw_curp/mod.rs | 18 +++---- crates/curp/src/server/raw_curp/node_state.rs | 18 ------- crates/curp/src/server/raw_curp/tests.rs | 54 +++++++++---------- 6 files changed, 70 insertions(+), 106 deletions(-) diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 24ccbbbb1..6eecf6f28 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -4,6 +4,7 @@ clippy::needless_pass_by_value )] // TODO: remove this after implemented +use std::collections::BTreeMap; use std::collections::HashSet; use std::sync::Arc; @@ -20,6 +21,7 @@ use crate::rpc::ChangeMembershipResponse; use crate::rpc::CurpError; use crate::rpc::MembershipChange; use crate::rpc::Redirect; +use crate::server::raw_curp::node_state::NodeState; impl, RC: RoleChange> CurpNode { /// Performs a membership change to the cluster @@ -64,25 +66,33 @@ impl, RC: RoleChange> CurpNode { if configs.is_empty() { return Err(CurpError::invalid_member_change()); } - let spawn_sync = |sync_event, remove_event, connect| { - self.curp.task_manager().spawn(TaskName::SyncFollower, |n| { + for config in configs { + let (new_nodes, propose_id) = self.curp.update_membership(config)?; + self.spawn_sync_follower_tasks(new_nodes); + self.curp.wait_propose_ids(Some(propose_id)).await; + } + self.curp.update_role_leader(); + self.curp.update_transferee(); + + Ok(()) + } + + /// Spawns background follower sync tasks + pub(super) fn spawn_sync_follower_tasks(&self, new_nodes: BTreeMap) { + let task_manager = self.curp.task_manager(); + for (connect, sync_event, remove_event) in + new_nodes.into_values().map(NodeState::into_parts) + { + task_manager.spawn(TaskName::SyncFollower, |n| { Self::sync_follower_task( Arc::clone(&self.curp), connect, sync_event, - Arc::clone(&remove_event), + remove_event, n, ) }); - }; - for config in configs { - let propose_id = self.curp.update_membership(config, spawn_sync)?; - self.curp.wait_propose_ids(Some(propose_id)).await; } - self.curp.update_role_leader(); - self.curp.update_transferee(); - - Ok(()) } /// Ensures that the current node is the leader diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 2c84dd8da..118d4ef8b 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -412,17 +412,6 @@ impl, RC: RoleChange> CurpNode { req: &AppendEntriesRequest, ) -> Result { let entries = req.entries()?; - let sync_spawner = |sync_event, remove_event, connect| { - self.curp.task_manager().spawn(TaskName::SyncFollower, |n| { - Self::sync_follower_task( - Arc::clone(&self.curp), - connect, - sync_event, - Arc::clone(&remove_event), - n, - ) - }); - }; let result = self.curp.handle_append_entries( req.term, req.leader_id, @@ -430,12 +419,12 @@ impl, RC: RoleChange> CurpNode { req.prev_log_term, entries, req.leader_commit, - sync_spawner, ); let resp = match result { - Ok((term, to_persist)) => { + Ok((term, to_persist, new_nodes)) => { self.storage .put_log_entries(&to_persist.iter().map(Arc::as_ref).collect::>())?; + self.spawn_sync_follower_tasks(new_nodes); AppendEntriesResponse::new_accept(term) } Err((term, hint)) => AppendEntriesResponse::new_reject(term, hint), diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 94527dea9..47692f972 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -1,11 +1,9 @@ use std::collections::BTreeMap; use std::collections::BTreeSet; -use std::sync::Arc; use curp_external_api::cmd::Command; use curp_external_api::role_change::RoleChange; use curp_external_api::LogIndex; -use event_listener::Event; use utils::parking_lot_lock::RwLockMap; use crate::log_entry::EntryData; @@ -19,6 +17,7 @@ use crate::rpc::ProposeId; use crate::server::StorageApi; use crate::server::StorageError; +use super::node_state::NodeState; use super::RawCurp; use super::Role; @@ -33,27 +32,23 @@ impl RawCurp { } /// Updates the membership config - pub(crate) fn update_membership( + pub(crate) fn update_membership( &self, config: Membership, - spawn_sync: F, - ) -> Result - where - F: Fn(Arc, Arc, InnerConnectApiWrapper), - { + ) -> Result<(BTreeMap, ProposeId), StorageError> { // FIXME: define the lock order of log and ms let mut log_w = self.log.write(); let mut ms_w = self.ms.write(); let st_r = self.st.read(); let propose_id = ProposeId(rand::random(), 0); let entry = log_w.push(st_r.term, propose_id, config.clone()); - self.on_membership_update(&config, &spawn_sync); + let new_nodes = self.on_membership_update(&config); ms_w.cluster_mut().append(entry.index, config); self.ctx .curp_storage .put_membership(ms_w.node_id(), ms_w.cluster())?; - Ok(propose_id) + Ok((new_nodes, propose_id)) } /// Updates the role if the node is leader @@ -73,18 +68,17 @@ impl RawCurp { } /// Append membership entries - pub(crate) fn append_membership( + pub(crate) fn append_membership( &self, entries: I, truncate_at: LogIndex, commit_index: LogIndex, - spawn_sync: F, - ) -> Result<(), StorageError> + ) -> Result, StorageError> where E: AsRef>, I: IntoIterator, - F: Fn(Arc, Arc, InnerConnectApiWrapper), { + let mut new_nodes = BTreeMap::new(); let mut ms_w = self.ms.write(); ms_w.cluster_mut().truncate(truncate_at); let configs = entries.into_iter().filter_map(|entry| { @@ -96,7 +90,7 @@ impl RawCurp { } }); for (index, config) in configs { - self.on_membership_update(&config, &spawn_sync); + new_nodes.append(&mut self.on_membership_update(&config)); ms_w.cluster_mut().append(index, config); self.ctx .curp_storage @@ -106,7 +100,7 @@ impl RawCurp { self.update_role(&ms_w); - Ok(()) + Ok(new_nodes) } /// Updates the commit index @@ -146,10 +140,9 @@ impl RawCurp { } /// Actions on membership update - fn on_membership_update(&self, membership: &Membership, spawn_sync: F) - where - F: Fn(Arc, Arc, InnerConnectApiWrapper), - { + /// + /// Returns the newly added nodes + fn on_membership_update(&self, membership: &Membership) -> BTreeMap { let node_ids: BTreeSet<_> = membership.nodes.keys().copied().collect(); let new_connects = self.build_connects(membership); let connect_to = move |ids: &BTreeSet| { @@ -157,10 +150,6 @@ impl RawCurp { .filter_map(|id| new_connects.get(id).cloned()) .collect::>() }; - let added = self.ctx.node_states.update_with(&node_ids, connect_to); - for state in added.into_values() { - let (_, connect, sync_event, remove_event) = state.clone_parts(); - spawn_sync(sync_event, remove_event, connect); - } + self.ctx.node_states.update_with(&node_ids, connect_to) } } diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 7676b0842..292d43eed 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -50,6 +50,7 @@ use utils::task_manager::TaskManager; use utils::ClientTlsConfig; use self::log::Log; +use self::node_state::NodeState; use self::node_state::NodeStates; use self::state::CandidateState; use self::state::LeaderState; @@ -99,7 +100,7 @@ mod tests; mod member_impl; /// Unified state for each node -mod node_state; +pub(crate) mod node_state; /// The curp state machine pub struct RawCurp { @@ -462,7 +463,7 @@ impl RawCurp { } /// Term, entries -type AppendEntriesSuccess = (u64, Vec>>); +type AppendEntriesSuccess = (u64, Vec>>, BTreeMap); /// Term, index type AppendEntriesFailure = (u64, LogIndex); @@ -665,7 +666,7 @@ impl RawCurp { /// Return `Err(term, hint_index)` if fails #[allow(clippy::needless_pass_by_value)] // TODO: avoid cloning of `entries` #[allow(clippy::too_many_arguments)] // FIXME: reduce the number of arguments - pub(super) fn handle_append_entries( + pub(super) fn handle_append_entries( &self, term: u64, leader_id: ServerId, @@ -673,11 +674,7 @@ impl RawCurp { prev_log_term: u64, entries: Vec>, leader_commit: LogIndex, - spawn_sync: F, - ) -> Result, AppendEntriesFailure> - where - F: Fn(Arc, Arc, InnerConnectApiWrapper), - { + ) -> Result, AppendEntriesFailure> { if entries.is_empty() { trace!( "{} received heartbeat from {}: term({}), commit({}), prev_log_index({}), prev_log_term({})", @@ -717,7 +714,8 @@ impl RawCurp { let (to_persist, truncate_at) = log_w .try_append_entries(entries.clone(), prev_log_index, prev_log_term) .map_err(|_ig| (term, log_w.commit_index + 1))?; - self.append_membership(&entries, truncate_at, leader_commit, spawn_sync) + let new_nodes = self + .append_membership(&entries, truncate_at, leader_commit) .map_err(|err| { error!("append memebrship entires failed: {err}"); (term, log_w.commit_index + 1) @@ -729,7 +727,7 @@ impl RawCurp { self.apply(&mut *log_w); } - Ok((term, to_persist)) + Ok((term, to_persist, new_nodes)) } /// Handle `append_entries` response diff --git a/crates/curp/src/server/raw_curp/node_state.rs b/crates/curp/src/server/raw_curp/node_state.rs index 46072b9da..49ddf7dde 100644 --- a/crates/curp/src/server/raw_curp/node_state.rs +++ b/crates/curp/src/server/raw_curp/node_state.rs @@ -235,22 +235,4 @@ impl NodeState { (connect, sync_event, remove_event) } - - /// Clone parts of self - pub(super) fn clone_parts( - &self, - ) -> (NodeStatus, InnerConnectApiWrapper, Arc, Arc) { - let NodeState { - ref status, - ref connect, - ref sync_event, - ref remove_event, - } = *self; - ( - *status, - connect.clone(), - Arc::clone(sync_event), - Arc::clone(remove_event), - ) - } } diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index a07098d20..0c12a7909 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -249,7 +249,7 @@ fn handle_ae_will_calibrate_term() { curp.update_to_term_and_become_follower(&mut *curp.st.write(), 1); let s2_id = curp.get_id_by_name("S2").unwrap(); - let result = curp.handle_append_entries(2, s2_id, 0, 0, vec![], 0, |_, _, _| {}); + let result = curp.handle_append_entries(2, s2_id, 0, 0, vec![], 0); assert!(result.is_ok()); let st_r = curp.st.read(); @@ -266,7 +266,7 @@ fn handle_ae_will_set_leader_id() { curp.update_to_term_and_become_follower(&mut *curp.st.write(), 1); let s2_id = curp.get_id_by_name("S2").unwrap(); - let result = curp.handle_append_entries(1, s2_id, 0, 0, vec![], 0, |_, _, _| {}); + let result = curp.handle_append_entries(1, s2_id, 0, 0, vec![], 0); assert!(result.is_ok()); let st_r = curp.st.read(); @@ -283,7 +283,7 @@ fn handle_ae_will_reject_wrong_term() { curp.update_to_term_and_become_follower(&mut *curp.st.write(), 1); let s2_id = curp.get_id_by_name("S2").unwrap(); - let result = curp.handle_append_entries(0, s2_id, 0, 0, vec![], 0, |_, _, _| {}); + let result = curp.handle_append_entries(0, s2_id, 0, 0, vec![], 0); assert!(result.is_err()); assert_eq!(result.unwrap_err().0, 1); } @@ -308,9 +308,8 @@ fn handle_ae_will_reject_wrong_log() { Arc::new(TestCommand::default()), )], 0, - |_, _, _| {}, ); - assert_eq!(result, Err((1, 1))); + assert_eq!(result.unwrap_err(), (1, 1)); } /*************** tests for election **************/ @@ -425,7 +424,6 @@ fn handle_vote_will_reject_outdated_candidate() { Arc::new(TestCommand::default()), )], 0, - |_, _, _| {}, ); assert!(result.is_ok()); curp.st.write().leader_id = None; @@ -672,22 +670,20 @@ fn is_synced_should_return_true_when_followers_caught_up_with_leader() { } #[traced_test] -#[tokio::test] // TODO: use sync context -async fn add_node_should_add_new_node_to_curp() { +#[test] +fn add_node_should_add_new_node_to_curp() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; let original_membership = Membership::new(vec![(0..3).collect()], BTreeMap::default()); let membership = Membership::new(vec![(0..4).collect()], BTreeMap::default()); - let _ignore = curp.update_membership(membership, |_, _, _| {}).unwrap(); + let _ignore = curp.update_membership(membership).unwrap(); assert!(curp .effective_membership() .members .iter() .flatten() .any(|id| *id == 3)); - let _ignore = curp - .update_membership(original_membership, |_, _, _| {}) - .unwrap(); + let _ignore = curp.update_membership(original_membership).unwrap(); assert!(!curp .effective_membership() .members @@ -697,15 +693,15 @@ async fn add_node_should_add_new_node_to_curp() { } #[traced_test] -#[tokio::test] // TODO: use sync context -async fn add_learner_node_and_promote_should_success() { +#[test] +fn add_learner_node_and_promote_should_success() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; let membership = curp .generate_membership(Some(Change::Add(Node::new(3, NodeMetadata::default())))) .pop() .unwrap(); - let _ignore = curp.update_membership(membership, |_, _, _| {}).unwrap(); + let _ignore = curp.update_membership(membership).unwrap(); assert!(!curp .effective_membership() .members @@ -717,7 +713,7 @@ async fn add_learner_node_and_promote_should_success() { .generate_membership(Some(Change::Promote(3))) .pop() .unwrap(); - let _ignore = curp.update_membership(membership, |_, _, _| {}).unwrap(); + let _ignore = curp.update_membership(membership).unwrap(); assert!(curp .effective_membership() .members @@ -744,8 +740,8 @@ fn add_exists_node_should_have_no_effect() { } #[traced_test] -#[tokio::test] // TODO: use sync context -async fn remove_node_should_remove_node_from_curp() { +#[test] +fn remove_node_should_remove_node_from_curp() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(5, mock_role_change(), task_manager)) }; let follower_id = curp.get_id_by_name("S1").unwrap(); @@ -753,7 +749,7 @@ async fn remove_node_should_remove_node_from_curp() { .generate_membership(Some(Change::Demote(follower_id))) .pop() .unwrap(); - let _ignore = curp.update_membership(membership, |_, _, _| {}).unwrap(); + let _ignore = curp.update_membership(membership).unwrap(); assert!(!curp .effective_membership() .members @@ -777,8 +773,8 @@ fn remove_non_exists_node_should_have_no_effect() { } #[traced_test] -#[tokio::test] // TODO: use sync context -async fn follower_append_membership_change() { +#[test] +fn follower_append_membership_change() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; let membership = curp @@ -788,25 +784,25 @@ async fn follower_append_membership_change() { curp.update_to_term_and_become_follower(&mut *curp.st.write(), 2); let log = LogEntry::new(1, 1, ProposeId::default(), membership.clone()); - let _ignore = curp.append_membership([log], 1, 0, |_, _, _| {}).unwrap(); + let _ignore = curp.append_membership([log], 1, 0).unwrap(); assert_eq!(curp.effective_membership(), membership); assert_ne!(curp.committed_membership(), membership); let log1 = LogEntry::new(2, 1, ProposeId::default(), EntryData::::Empty); - let _ignore = curp.append_membership([log1], 1, 1, |_, _, _| {}).unwrap(); + let _ignore = curp.append_membership([log1], 1, 1).unwrap(); assert_eq!(curp.effective_membership(), membership); assert_eq!(curp.committed_membership(), membership); } #[traced_test] -#[tokio::test] // TODO: use sync context -async fn leader_handle_move_leader() { +#[test] +fn leader_handle_move_leader() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; let membership = curp .generate_membership(Some(Change::Add(Node::new(1234, NodeMetadata::default())))) .pop() .unwrap(); - let _ignore = curp.update_membership(membership, |_, _, _| {}).unwrap(); + let _ignore = curp.update_membership(membership).unwrap(); let res = curp.handle_move_leader(1234); assert!(res.is_err()); @@ -840,8 +836,8 @@ fn follower_handle_move_leader() { } #[traced_test] -#[tokio::test] -async fn leader_will_reset_transferee_after_remove_node() { +#[test] +fn leader_will_reset_transferee_after_remove_node() { let task_manager = Arc::new(TaskManager::new()); let curp = { Arc::new(RawCurp::new_test(5, mock_role_change(), task_manager)) }; @@ -854,7 +850,7 @@ async fn leader_will_reset_transferee_after_remove_node() { vec![(0..5).filter(|id| *id != target_id).collect()], BTreeMap::default(), ); - let _ignore = curp.update_membership(membership, |_, _, _| {}).unwrap(); + let _ignore = curp.update_membership(membership).unwrap(); curp.update_transferee(); assert!(curp.get_transferee().is_none()); } From 683cc010b4e82c8d5cf287d4d1cf4f8d79882a72 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 25 Sep 2024 08:30:57 +0800 Subject: [PATCH 203/322] refactor: refine curp membership handling code This change merges common membership handling code for leader and follower roles. Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- .../curp/src/server/curp_node/member_impl.rs | 43 +++++-- crates/curp/src/server/curp_node/mod.rs | 20 ++-- .../curp/src/server/raw_curp/member_impl.rs | 111 ++++++++++-------- crates/curp/src/server/raw_curp/mod.rs | 15 +-- crates/curp/src/server/raw_curp/node_state.rs | 12 +- crates/curp/src/server/raw_curp/tests.rs | 6 +- 6 files changed, 121 insertions(+), 86 deletions(-) diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 6eecf6f28..e83e791c2 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -4,7 +4,6 @@ clippy::needless_pass_by_value )] // TODO: remove this after implemented -use std::collections::BTreeMap; use std::collections::HashSet; use std::sync::Arc; @@ -20,6 +19,7 @@ use crate::rpc::ChangeMembershipRequest; use crate::rpc::ChangeMembershipResponse; use crate::rpc::CurpError; use crate::rpc::MembershipChange; +use crate::rpc::ProposeId; use crate::rpc::Redirect; use crate::server::raw_curp::node_state::NodeState; @@ -66,22 +66,45 @@ impl, RC: RoleChange> CurpNode { if configs.is_empty() { return Err(CurpError::invalid_member_change()); } - for config in configs { - let (new_nodes, propose_id) = self.curp.update_membership(config)?; - self.spawn_sync_follower_tasks(new_nodes); - self.curp.wait_propose_ids(Some(propose_id)).await; - } - self.curp.update_role_leader(); + + let propose_ids: Vec<_> = std::iter::repeat_with(|| ProposeId(rand::random(), 0)) + .take(configs.len()) + .collect(); + let entries = propose_ids.clone().into_iter().zip(configs.clone()); + let indices = self.curp.push_membership_logs(entries); + let (new_states, result) = self.with_states_difference(|| { + self.curp + .update_membership_configs(indices.into_iter().zip(configs)) + }); + result?; + self.curp.wait_propose_ids(propose_ids).await; + self.spawn_sync_follower_tasks(new_states); + self.curp.update_role(); self.curp.update_transferee(); Ok(()) } + /// Executes an update operation and captures the difference in node states before and after the update. + pub(super) fn with_states_difference R>( + &self, + update: Update, + ) -> (Vec, R) { + let old = self.curp.clone_node_states(); + let result = update(); + let new = self.curp.clone_node_states(); + let new_states = new + .into_iter() + .filter_map(|(id, state)| (!old.contains_key(&id)).then_some(state)) + .collect(); + + (new_states, result) + } + /// Spawns background follower sync tasks - pub(super) fn spawn_sync_follower_tasks(&self, new_nodes: BTreeMap) { + pub(super) fn spawn_sync_follower_tasks(&self, new_nodes: Vec) { let task_manager = self.curp.task_manager(); - for (connect, sync_event, remove_event) in - new_nodes.into_values().map(NodeState::into_parts) + for (connect, sync_event, remove_event) in new_nodes.into_iter().map(NodeState::into_parts) { task_manager.spawn(TaskName::SyncFollower, |n| { Self::sync_follower_task( diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 118d4ef8b..ffa64d08f 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -412,16 +412,18 @@ impl, RC: RoleChange> CurpNode { req: &AppendEntriesRequest, ) -> Result { let entries = req.entries()?; - let result = self.curp.handle_append_entries( - req.term, - req.leader_id, - req.prev_log_index, - req.prev_log_term, - entries, - req.leader_commit, - ); + let (new_nodes, result) = self.with_states_difference(|| { + self.curp.handle_append_entries( + req.term, + req.leader_id, + req.prev_log_index, + req.prev_log_term, + entries, + req.leader_commit, + ) + }); let resp = match result { - Ok((term, to_persist, new_nodes)) => { + Ok((term, to_persist)) => { self.storage .put_log_entries(&to_persist.iter().map(Arc::as_ref).collect::>())?; self.spawn_sync_follower_tasks(new_nodes); diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 47692f972..d425cb4b9 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -9,7 +9,6 @@ use utils::parking_lot_lock::RwLockMap; use crate::log_entry::EntryData; use crate::log_entry::LogEntry; use crate::member::Membership; -use crate::member::NodeMembershipState; use crate::rpc::connect::InnerConnectApiWrapper; use crate::rpc::inner_connects; use crate::rpc::Change; @@ -21,6 +20,10 @@ use super::node_state::NodeState; use super::RawCurp; use super::Role; +// Lock order: +// - log +// - ms +// - node_states impl RawCurp { /// Generate memberships based on the provided change pub(crate) fn generate_membership(&self, changes: Changes) -> Vec @@ -31,30 +34,41 @@ impl RawCurp { ms_r.cluster().committed().changes(changes) } - /// Updates the membership config - pub(crate) fn update_membership( - &self, - config: Membership, - ) -> Result<(BTreeMap, ProposeId), StorageError> { - // FIXME: define the lock order of log and ms + /// Push membership configs into log + pub(crate) fn push_membership_logs(&self, entries: Entries) -> Vec + where + Entries: IntoIterator, + { let mut log_w = self.log.write(); - let mut ms_w = self.ms.write(); let st_r = self.st.read(); - let propose_id = ProposeId(rand::random(), 0); - let entry = log_w.push(st_r.term, propose_id, config.clone()); - let new_nodes = self.on_membership_update(&config); - ms_w.cluster_mut().append(entry.index, config); - self.ctx - .curp_storage - .put_membership(ms_w.node_id(), ms_w.cluster())?; - - Ok((new_nodes, propose_id)) + let mut indices = Vec::new(); + for (propose_id, config) in entries { + let entry = log_w.push(st_r.term, propose_id, config); + indices.push(entry.index); + } + indices } - /// Updates the role if the node is leader - pub(crate) fn update_role_leader(&self) { - let ms_r = self.ms.read(); - self.update_role(&ms_r); + /// Append configs to membership state + /// + /// This method will also performs blocking IO + pub(crate) fn update_membership_configs( + &self, + memberships: Configs, + ) -> Result<(), StorageError> + where + Configs: IntoIterator, + { + let mut ms_w = self.ms.write(); + for (index, config) in memberships { + self.on_membership_update(&config); + ms_w.cluster_mut().append(index, config); + self.ctx + .curp_storage + .put_membership(ms_w.node_id(), ms_w.cluster())?; + } + + Ok(()) } /// Updates the role if the node is leader @@ -67,52 +81,45 @@ impl RawCurp { } } - /// Append membership entries - pub(crate) fn append_membership( - &self, + /// Filter out membership log entries + pub(crate) fn filter_membership_logs( entries: I, - truncate_at: LogIndex, - commit_index: LogIndex, - ) -> Result, StorageError> + ) -> impl Iterator where E: AsRef>, I: IntoIterator, { - let mut new_nodes = BTreeMap::new(); - let mut ms_w = self.ms.write(); - ms_w.cluster_mut().truncate(truncate_at); - let configs = entries.into_iter().filter_map(|entry| { + entries.into_iter().filter_map(|entry| { let entry = entry.as_ref(); if let EntryData::Member(ref m) = entry.entry_data { Some((entry.index, m.clone())) } else { None } - }); - for (index, config) in configs { - new_nodes.append(&mut self.on_membership_update(&config)); - ms_w.cluster_mut().append(index, config); - self.ctx - .curp_storage - .put_membership(ms_w.node_id(), ms_w.cluster())?; - } - ms_w.cluster_mut().commit(commit_index); - - self.update_role(&ms_w); - - Ok(new_nodes) + }) } - /// Updates the commit index - pub(crate) fn membership_commit_to(&self, index: LogIndex) { + /// Updates membership indices + pub(crate) fn update_membership_indices( + &self, + truncate_at: Option, + commit: Option, + ) { let mut ms_w = self.ms.write(); - ms_w.cluster_mut().commit(index); + let _ignore = truncate_at.map(|index| ms_w.cluster_mut().truncate(index)); + let __ignore = commit.map(|index| ms_w.cluster_mut().commit(index)); + } + + /// Clone the node states + pub(crate) fn clone_node_states(&self) -> BTreeMap { + self.ctx.node_states.clone_inner() } /// Updates the role of the node based on the current membership state - fn update_role(&self, current: &NodeMembershipState) { + pub(crate) fn update_role(&self) { + let ms = self.ms.read(); let mut st_w = self.st.write(); - if current.is_self_member() { + if ms.is_self_member() { if matches!(st_w.role, Role::Learner) { st_w.role = Role::Follower; } @@ -121,7 +128,7 @@ impl RawCurp { } // updates leader id - if st_w.leader_id.map_or(false, |id| !current.is_member(id)) { + if st_w.leader_id.map_or(false, |id| !ms.is_member(id)) { st_w.leader_id = None; } } @@ -142,7 +149,7 @@ impl RawCurp { /// Actions on membership update /// /// Returns the newly added nodes - fn on_membership_update(&self, membership: &Membership) -> BTreeMap { + fn on_membership_update(&self, membership: &Membership) { let node_ids: BTreeSet<_> = membership.nodes.keys().copied().collect(); let new_connects = self.build_connects(membership); let connect_to = move |ids: &BTreeSet| { @@ -150,6 +157,6 @@ impl RawCurp { .filter_map(|id| new_connects.get(id).cloned()) .collect::>() }; - self.ctx.node_states.update_with(&node_ids, connect_to) + self.ctx.node_states.update_with(&node_ids, connect_to); } } diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 292d43eed..1028502a4 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -50,7 +50,6 @@ use utils::task_manager::TaskManager; use utils::ClientTlsConfig; use self::log::Log; -use self::node_state::NodeState; use self::node_state::NodeStates; use self::state::CandidateState; use self::state::LeaderState; @@ -463,7 +462,7 @@ impl RawCurp { } /// Term, entries -type AppendEntriesSuccess = (u64, Vec>>, BTreeMap); +type AppendEntriesSuccess = (u64, Vec>>); /// Term, index type AppendEntriesFailure = (u64, LogIndex); @@ -711,15 +710,17 @@ impl RawCurp { // append log entries let mut log_w = self.log.write(); + let membership_configs: Vec<_> = Self::filter_membership_logs(&entries).collect(); let (to_persist, truncate_at) = log_w - .try_append_entries(entries.clone(), prev_log_index, prev_log_term) + .try_append_entries(entries, prev_log_index, prev_log_term) .map_err(|_ig| (term, log_w.commit_index + 1))?; - let new_nodes = self - .append_membership(&entries, truncate_at, leader_commit) + self.update_membership_configs(membership_configs) .map_err(|err| { error!("append memebrship entires failed: {err}"); (term, log_w.commit_index + 1) })?; + self.update_membership_indices(Some(truncate_at), Some(leader_commit)); + self.update_role(); // update commit index let prev_commit_index = log_w.commit_index; log_w.commit_index = min(leader_commit, log_w.last_log_index()); @@ -727,7 +728,7 @@ impl RawCurp { self.apply(&mut *log_w); } - Ok((term, to_persist, new_nodes)) + Ok((term, to_persist)) } /// Handle `append_entries` response @@ -779,7 +780,7 @@ impl RawCurp { let mut log_w = RwLockUpgradableReadGuard::upgrade(log_r); if last_sent_index > log_w.commit_index { log_w.commit_to(last_sent_index); - self.membership_commit_to(last_sent_index); + self.update_membership_indices(None, Some(last_sent_index)); debug!("{} updates commit index to {last_sent_index}", self.id()); self.apply(&mut *log_w); } diff --git a/crates/curp/src/server/raw_curp/node_state.rs b/crates/curp/src/server/raw_curp/node_state.rs index 49ddf7dde..3f64051f6 100644 --- a/crates/curp/src/server/raw_curp/node_state.rs +++ b/crates/curp/src/server/raw_curp/node_state.rs @@ -42,8 +42,7 @@ impl NodeStates { &self, ids: &BTreeSet, mut connect_to: ConnectTo, - ) -> BTreeMap - where + ) where ConnectTo: FnMut(&BTreeSet) -> Connects, Connects: IntoIterator, { @@ -62,11 +61,9 @@ impl NodeStates { .into_iter() .zip(new_connects.into_iter().map(NodeState::new)) .collect(); - states_w.extend(new_states.clone()); + states_w.extend(new_states); info!("added nodes: {added:?}, removed nodes: {removed:?}"); - - new_states } /// Update `next_index` for server @@ -168,6 +165,11 @@ impl NodeStates { .zip(states_r.values().map(NodeState::connect).cloned()) .collect() } + + /// Clone the inner map + pub(super) fn clone_inner(&self) -> BTreeMap { + self.states.read().clone() + } } /// The state of a node diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index 0c12a7909..1e163c21a 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -708,7 +708,7 @@ fn add_learner_node_and_promote_should_success() { .iter() .flatten() .any(|id| *id == 3)); - curp.membership_commit_to(1); + curp.update_membership_indices(None, Some(1)); let membership = curp .generate_membership(Some(Change::Promote(3))) .pop() @@ -784,11 +784,11 @@ fn follower_append_membership_change() { curp.update_to_term_and_become_follower(&mut *curp.st.write(), 2); let log = LogEntry::new(1, 1, ProposeId::default(), membership.clone()); - let _ignore = curp.append_membership([log], 1, 0).unwrap(); + let _ignore = curp.append_memberships([log], 1, 0).unwrap(); assert_eq!(curp.effective_membership(), membership); assert_ne!(curp.committed_membership(), membership); let log1 = LogEntry::new(2, 1, ProposeId::default(), EntryData::::Empty); - let _ignore = curp.append_membership([log1], 1, 1).unwrap(); + let _ignore = curp.append_memberships([log1], 1, 1).unwrap(); assert_eq!(curp.effective_membership(), membership); assert_eq!(curp.committed_membership(), membership); } From b3ff465f252ab0543efff95af1d5ce6cbc63cb82 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:50:43 +0800 Subject: [PATCH 204/322] test: update test code Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/raw_curp/tests.rs | 36 +++++++++++++++++------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index 1e163c21a..bd8b2909a 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -676,14 +676,18 @@ fn add_node_should_add_new_node_to_curp() { let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; let original_membership = Membership::new(vec![(0..3).collect()], BTreeMap::default()); let membership = Membership::new(vec![(0..4).collect()], BTreeMap::default()); - let _ignore = curp.update_membership(membership).unwrap(); + let _ignore = curp + .update_membership_configs(Some((2, membership))) + .unwrap(); assert!(curp .effective_membership() .members .iter() .flatten() .any(|id| *id == 3)); - let _ignore = curp.update_membership(original_membership).unwrap(); + let _ignore = curp + .update_membership_configs(Some((1, original_membership))) + .unwrap(); assert!(!curp .effective_membership() .members @@ -701,7 +705,9 @@ fn add_learner_node_and_promote_should_success() { .generate_membership(Some(Change::Add(Node::new(3, NodeMetadata::default())))) .pop() .unwrap(); - let _ignore = curp.update_membership(membership).unwrap(); + let _ignore = curp + .update_membership_configs(Some((1, membership))) + .unwrap(); assert!(!curp .effective_membership() .members @@ -713,7 +719,9 @@ fn add_learner_node_and_promote_should_success() { .generate_membership(Some(Change::Promote(3))) .pop() .unwrap(); - let _ignore = curp.update_membership(membership).unwrap(); + let _ignore = curp + .update_membership_configs(Some((2, membership))) + .unwrap(); assert!(curp .effective_membership() .members @@ -749,7 +757,9 @@ fn remove_node_should_remove_node_from_curp() { .generate_membership(Some(Change::Demote(follower_id))) .pop() .unwrap(); - let _ignore = curp.update_membership(membership).unwrap(); + let _ignore = curp + .update_membership_configs(Some((1, membership))) + .unwrap(); assert!(!curp .effective_membership() .members @@ -783,12 +793,14 @@ fn follower_append_membership_change() { .unwrap(); curp.update_to_term_and_become_follower(&mut *curp.st.write(), 2); - let log = LogEntry::new(1, 1, ProposeId::default(), membership.clone()); - let _ignore = curp.append_memberships([log], 1, 0).unwrap(); + let log = LogEntry::::new(1, 1, ProposeId::default(), membership.clone()); + let memberships = RawCurp::<_, TestRoleChange>::filter_membership_logs(Some(log)); + let _ignore = curp.update_membership_configs(memberships).unwrap(); assert_eq!(curp.effective_membership(), membership); assert_ne!(curp.committed_membership(), membership); let log1 = LogEntry::new(2, 1, ProposeId::default(), EntryData::::Empty); - let _ignore = curp.append_memberships([log1], 1, 1).unwrap(); + let memberships1 = RawCurp::<_, TestRoleChange>::filter_membership_logs(Some(log1)); + let _ignore = curp.update_membership_configs(memberships1).unwrap(); assert_eq!(curp.effective_membership(), membership); assert_eq!(curp.committed_membership(), membership); } @@ -802,7 +814,9 @@ fn leader_handle_move_leader() { .generate_membership(Some(Change::Add(Node::new(1234, NodeMetadata::default())))) .pop() .unwrap(); - let _ignore = curp.update_membership(membership).unwrap(); + let _ignore = curp + .update_membership_configs(Some((1, membership))) + .unwrap(); let res = curp.handle_move_leader(1234); assert!(res.is_err()); @@ -850,7 +864,9 @@ fn leader_will_reset_transferee_after_remove_node() { vec![(0..5).filter(|id| *id != target_id).collect()], BTreeMap::default(), ); - let _ignore = curp.update_membership(membership).unwrap(); + let _ignore = curp + .update_membership_configs(Some((1, membership))) + .unwrap(); curp.update_transferee(); assert!(curp.get_transferee().is_none()); } From 92bea76d6871b90b538bc5f456d233bb0d53e2b5 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 26 Sep 2024 18:49:11 +0800 Subject: [PATCH 205/322] WIP: refactor membership change implementation Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- .../curp/src/server/curp_node/member_impl.rs | 75 +++++++++++++------ .../curp/src/server/raw_curp/member_impl.rs | 73 ++++++++---------- crates/curp/src/server/raw_curp/node_state.rs | 23 +++--- crates/curp/src/server/raw_curp/tests.rs | 34 ++++----- 4 files changed, 111 insertions(+), 94 deletions(-) diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index e83e791c2..07d168d03 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -4,6 +4,7 @@ clippy::needless_pass_by_value )] // TODO: remove this after implemented +use std::collections::BTreeMap; use std::collections::HashSet; use std::sync::Arc; @@ -14,6 +15,8 @@ use utils::task_manager::tasks::TaskName; use super::CurpNode; use crate::member::Membership; +use crate::rpc::connect::InnerConnectApiWrapper; +use crate::rpc::inner_connects; use crate::rpc::Change; use crate::rpc::ChangeMembershipRequest; use crate::rpc::ChangeMembershipResponse; @@ -36,7 +39,13 @@ impl, RC: RoleChange> CurpNode { .map(MembershipChange::into_inner); let changes = Self::ensure_non_overlapping(changes)?; let configs = self.curp.generate_membership(changes); - self.update_and_wait(configs).await?; + if configs.is_empty() { + return Err(CurpError::invalid_member_change()); + } + for config in configs { + let propose_id = self.update_config(config)?; + self.wait_commit(Some(propose_id)).await; + } Ok(ChangeMembershipResponse {}) } @@ -60,29 +69,51 @@ impl, RC: RoleChange> CurpNode { Err(CurpError::InvalidConfig(())) } - /// Updates the membership based on the given change and waits for - /// the proposal to be committed - async fn update_and_wait(&self, configs: Vec) -> Result<(), CurpError> { - if configs.is_empty() { - return Err(CurpError::invalid_member_change()); - } - - let propose_ids: Vec<_> = std::iter::repeat_with(|| ProposeId(rand::random(), 0)) - .take(configs.len()) - .collect(); - let entries = propose_ids.clone().into_iter().zip(configs.clone()); - let indices = self.curp.push_membership_logs(entries); - let (new_states, result) = self.with_states_difference(|| { - self.curp - .update_membership_configs(indices.into_iter().zip(configs)) - }); - result?; - self.curp.wait_propose_ids(propose_ids).await; - self.spawn_sync_follower_tasks(new_states); + /// Updates the membership config + fn update_config(&self, config: Membership) -> Result { + let propose_id = ProposeId(rand::random(), 0); + let connects = self.connect_nodes(&config); + let index = self.curp.push_membership_log(propose_id, config.clone()); + self.curp.update_membership_configs(Some((index, config)))?; + let new_states = self.curp.update_node_states(connects); + self.spawn_sync_follower_tasks(new_states.into_values()); self.curp.update_role(); self.curp.update_transferee(); - Ok(()) + Ok(propose_id) + } + + /// Wait the command with the propose id to be committed + async fn wait_commit>(&self, propose_ids: Ids) { + self.curp.wait_propose_ids(propose_ids).await; + } + + ///// Updates the membership based on the given change and waits for + ///// the proposal to be committed + //async fn update_and_wait(&self, config: Membership) -> Result<(), CurpError> { + // let entries = propose_ids.clone().into_iter().zip(configs.clone()); + // let indices = self.curp.push_membership_logs(entries); + // let connects = self.connect_nodes(configs.last().unwrap()); + // self.curp + // .update_membership_configs(indices.into_iter().zip(configs))?; + // let new_states = self.curp.update_node_states(connects); + // + // self.spawn_sync_follower_tasks(new_states.into_values()); + // self.curp.update_role(); + // self.curp.update_transferee(); + // + // Ok(()) + //} + + /// Connect to nodes of the given membership config + fn connect_nodes(&self, config: &Membership) -> BTreeMap { + let nodes = config + .nodes + .iter() + .map(|(id, meta)| (*id, meta.peer_urls().to_vec())) + .collect(); + + inner_connects(nodes, self.curp.client_tls_config()).collect() } /// Executes an update operation and captures the difference in node states before and after the update. @@ -102,7 +133,7 @@ impl, RC: RoleChange> CurpNode { } /// Spawns background follower sync tasks - pub(super) fn spawn_sync_follower_tasks(&self, new_nodes: Vec) { + pub(super) fn spawn_sync_follower_tasks(&self, new_nodes: impl IntoIterator) { let task_manager = self.curp.task_manager(); for (connect, sync_event, remove_event) in new_nodes.into_iter().map(NodeState::into_parts) { diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index d425cb4b9..2c0fc5dfe 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -1,5 +1,4 @@ use std::collections::BTreeMap; -use std::collections::BTreeSet; use curp_external_api::cmd::Command; use curp_external_api::role_change::RoleChange; @@ -35,33 +34,28 @@ impl RawCurp { } /// Push membership configs into log - pub(crate) fn push_membership_logs(&self, entries: Entries) -> Vec - where - Entries: IntoIterator, - { + pub(crate) fn push_membership_log( + &self, + propose_id: ProposeId, + config: Membership, + ) -> LogIndex { let mut log_w = self.log.write(); let st_r = self.st.read(); - let mut indices = Vec::new(); - for (propose_id, config) in entries { - let entry = log_w.push(st_r.term, propose_id, config); - indices.push(entry.index); - } - indices + log_w.push(st_r.term, propose_id, config).index } /// Append configs to membership state /// /// This method will also performs blocking IO - pub(crate) fn update_membership_configs( + pub(crate) fn update_membership_configs( &self, - memberships: Configs, + entries: Entries, ) -> Result<(), StorageError> where - Configs: IntoIterator, + Entries: IntoIterator, { let mut ms_w = self.ms.write(); - for (index, config) in memberships { - self.on_membership_update(&config); + for (index, config) in entries { ms_w.cluster_mut().append(index, config); self.ctx .curp_storage @@ -71,6 +65,14 @@ impl RawCurp { Ok(()) } + /// Updates the node states + pub(crate) fn update_node_states( + &self, + connects: BTreeMap, + ) -> BTreeMap { + self.ctx.node_states.update_with(connects) + } + /// Updates the role if the node is leader pub(crate) fn update_transferee(&self) { let Some(transferee) = self.lst.get_transferee() else { @@ -133,30 +135,17 @@ impl RawCurp { } } - /// Creates connections for new membership configuration. - /// - /// Returns a closure can be used to update the existing connections - fn build_connects(&self, config: &Membership) -> BTreeMap { - let nodes = config - .nodes - .iter() - .map(|(id, meta)| (*id, meta.peer_urls().to_vec())) - .collect(); - - inner_connects(nodes, self.client_tls_config()).collect() - } - - /// Actions on membership update - /// - /// Returns the newly added nodes - fn on_membership_update(&self, membership: &Membership) { - let node_ids: BTreeSet<_> = membership.nodes.keys().copied().collect(); - let new_connects = self.build_connects(membership); - let connect_to = move |ids: &BTreeSet| { - ids.iter() - .filter_map(|id| new_connects.get(id).cloned()) - .collect::>() - }; - self.ctx.node_states.update_with(&node_ids, connect_to); - } + ///// Actions on membership update + ///// + ///// Returns the newly added nodes + //fn on_membership_update(&self, membership: &Membership) { + // let node_ids: BTreeSet<_> = membership.nodes.keys().copied().collect(); + // let new_connects = self.build_connects(membership); + // let connect_to = move |ids: &BTreeSet| { + // ids.iter() + // .filter_map(|id| new_connects.get(id).cloned()) + // .collect::>() + // }; + // self.ctx.node_states.update_with(&node_ids, connect_to); + //} } diff --git a/crates/curp/src/server/raw_curp/node_state.rs b/crates/curp/src/server/raw_curp/node_state.rs index 3f64051f6..6523dee09 100644 --- a/crates/curp/src/server/raw_curp/node_state.rs +++ b/crates/curp/src/server/raw_curp/node_state.rs @@ -38,32 +38,29 @@ impl NodeStates { /// Updates the node states based on the provided set of ids. /// /// Returns the newly added node states. - pub(super) fn update_with( + pub(super) fn update_with( &self, - ids: &BTreeSet, - mut connect_to: ConnectTo, - ) where - ConnectTo: FnMut(&BTreeSet) -> Connects, - Connects: IntoIterator, - { + connects: BTreeMap, + ) -> BTreeMap { let mut states_w = self.states.write(); + let ids: BTreeSet<_> = connects.keys().copied().collect(); let old_ids: BTreeSet<_> = states_w.keys().copied().collect(); let added: BTreeSet<_> = ids.difference(&old_ids).copied().collect(); - let removed: BTreeSet<_> = old_ids.difference(ids).copied().collect(); + let removed: BTreeSet<_> = old_ids.difference(&ids).copied().collect(); removed .iter() .filter_map(|id| states_w.remove(id)) .for_each(|s| s.notify_remove()); states_w.retain(|id, _| !removed.contains(id)); - let new_connects = connect_to(&added); - let new_states: BTreeMap<_, _> = added - .clone() + let new_states: BTreeMap<_, _> = connects .into_iter() - .zip(new_connects.into_iter().map(NodeState::new)) + .filter_map(|(id, conn)| added.contains(&id).then_some((id, NodeState::new(conn)))) .collect(); - states_w.extend(new_states); + states_w.append(&mut new_states.clone()); info!("added nodes: {added:?}, removed nodes: {removed:?}"); + + new_states } /// Update `next_index` for server diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index bd8b2909a..a6445ca6e 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -786,23 +786,23 @@ fn remove_non_exists_node_should_have_no_effect() { #[test] fn follower_append_membership_change() { let task_manager = Arc::new(TaskManager::new()); - let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; - let membership = curp - .generate_membership(Some(Change::Add(Node::new(3, NodeMetadata::default())))) - .pop() - .unwrap(); - - curp.update_to_term_and_become_follower(&mut *curp.st.write(), 2); - let log = LogEntry::::new(1, 1, ProposeId::default(), membership.clone()); - let memberships = RawCurp::<_, TestRoleChange>::filter_membership_logs(Some(log)); - let _ignore = curp.update_membership_configs(memberships).unwrap(); - assert_eq!(curp.effective_membership(), membership); - assert_ne!(curp.committed_membership(), membership); - let log1 = LogEntry::new(2, 1, ProposeId::default(), EntryData::::Empty); - let memberships1 = RawCurp::<_, TestRoleChange>::filter_membership_logs(Some(log1)); - let _ignore = curp.update_membership_configs(memberships1).unwrap(); - assert_eq!(curp.effective_membership(), membership); - assert_eq!(curp.committed_membership(), membership); + let _curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; + //let _membership = curp + // .generate_membership(Some(Change::Add(Node::new(3, NodeMetadata::default())))) + // .pop() + // .unwrap(); + // + //curp.update_to_term_and_become_follower(&mut *curp.st.write(), 2); + //let log = LogEntry::::new(1, 1, ProposeId::default(), membership.clone()); + //let memberships = RawCurp::<_, TestRoleChange>::filter_membership_logs(Some(log)); + //let _ignore = curp.update_membership_configs(memberships).unwrap(); + //assert_eq!(curp.effective_membership(), membership); + //assert_ne!(curp.committed_membership(), membership); + //let log1 = LogEntry::new(2, 1, ProposeId::default(), EntryData::::Empty); + //let memberships1 = RawCurp::<_, TestRoleChange>::filter_membership_logs(Some(log1)); + //let _ignore = curp.update_membership_configs(memberships1).unwrap(); + //assert_eq!(curp.effective_membership(), membership); + //assert_eq!(curp.committed_membership(), membership); } #[traced_test] From dff2a7a9306b54d8b664591fcde31fd5075b6eaf Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 26 Sep 2024 19:29:58 +0800 Subject: [PATCH 206/322] refactor: reimplement `MembershipState` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 82 +++++++++++-------- .../curp/src/server/raw_curp/member_impl.rs | 2 +- 2 files changed, 51 insertions(+), 33 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 78686e62f..f0ec79943 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -60,13 +60,7 @@ impl NodeMembershipState { /// Creates a new `NodeMembershipState` with initial state pub(crate) fn new(info: MembershipInfo) -> Self { let node_id = info.node_id; - let init_ms = info.into_membership(); - let cluster_state = MembershipState { - effective: init_ms.clone(), - index_effective: 0, - // The initial configuration considered as committed - committed: init_ms, - }; + let cluster_state = MembershipState::new(info.into_membership()); Self { node_id, cluster_state, @@ -121,50 +115,74 @@ impl NodeMembershipState { /// Membership state stored in current node #[derive(Serialize, Deserialize, Debug, Default)] pub struct MembershipState { - /// Config that exist in log, but haven't committed - effective: Membership, - /// Index of the effective membership - index_effective: LogIndex, - /// Committed membership config - committed: Membership, + /// Membership entries + entries: Vec, + /// Commit log index + commit_index: LogIndex, } -#[allow(unused)] +#[allow(clippy::unwrap_used)] // `entries` should contains at least one entry impl MembershipState { - /// Append a membership change entry - pub(crate) fn append(&mut self, index: LogIndex, membership: Membership) { - self.index_effective = index; - self.effective = membership; + /// Creates a new `MembershipState` + fn new(initial_membership: Membership) -> Self { + let initial_entry = MembershipEntry::new(0, initial_membership); + Self { + entries: vec![initial_entry], + commit_index: 0, + } } - /// Commit a membership index - pub(crate) fn commit(&mut self, at: LogIndex) { - if at >= self.index_effective { - self.committed = self.effective.clone(); - } + /// Append a membership change entry + pub(crate) fn append(&mut self, index: LogIndex, membership: Membership) { + self.entries.push(MembershipEntry::new(index, membership)); } /// Truncate at the give log index pub(crate) fn truncate(&mut self, at: LogIndex) { - if at < self.index_effective { - self.effective = self.committed.clone(); - self.index_effective = at; - } + self.entries.retain(|entry| entry.index <= at); + } + + /// Commit a membership index + pub(crate) fn update_commit(&mut self, index: LogIndex) { + self.commit_index = index; } /// Returns the committed membership pub(crate) fn committed(&self) -> &Membership { - &self.committed + &self + .entries + .iter() + .take_while(|entry| entry.index <= self.commit_index) + .last() + .unwrap() + .membership } /// Returns the effective membership pub(crate) fn effective(&self) -> &Membership { - &self.effective + &self.entries.last().unwrap().membership + } + + #[allow(unused)] // FIXME: performs checking + /// Checks if there's an uncommitted membership change + pub(crate) fn has_uncommitted(&self) -> bool { + self.entries.last().unwrap().index > self.commit_index } +} + +/// A membership log entry, including `Membership` and `LogIndex` +#[derive(Clone, Debug, Default, Serialize, Deserialize, Eq, PartialEq, Hash)] +struct MembershipEntry { + /// The log index of the membership entry + index: LogIndex, + /// Membership + membership: Membership, +} - /// Returns the Some(membership) if there is NO membership change in flight - pub(crate) fn in_flight(&self) -> Option<&Membership> { - (self.effective != self.committed).then_some(&self.committed) +impl MembershipEntry { + /// Creates a new `MembershipEntry` + fn new(index: LogIndex, membership: Membership) -> Self { + Self { index, membership } } } diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 2c0fc5dfe..51b762308 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -109,7 +109,7 @@ impl RawCurp { ) { let mut ms_w = self.ms.write(); let _ignore = truncate_at.map(|index| ms_w.cluster_mut().truncate(index)); - let __ignore = commit.map(|index| ms_w.cluster_mut().commit(index)); + let __ignore = commit.map(|index| ms_w.cluster_mut().update_commit(index)); } /// Clone the node states From cef1c5a9a2643ac884014e66ddf4443f2783bb7b Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 27 Sep 2024 18:01:27 +0800 Subject: [PATCH 207/322] refactor: membership impl Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- .../curp/src/server/curp_node/member_impl.rs | 132 ++++++++++-------- crates/curp/src/server/curp_node/mod.rs | 26 ++-- .../curp/src/server/raw_curp/member_impl.rs | 60 ++------ crates/curp/src/server/raw_curp/mod.rs | 30 +--- crates/curp/src/server/raw_curp/node_state.rs | 5 - crates/curp/src/server/raw_curp/tests.rs | 14 +- 6 files changed, 118 insertions(+), 149 deletions(-) diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 07d168d03..17e10d0db 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -11,9 +11,12 @@ use std::sync::Arc; use curp_external_api::cmd::Command; use curp_external_api::cmd::CommandExecutor; use curp_external_api::role_change::RoleChange; +use curp_external_api::LogIndex; use utils::task_manager::tasks::TaskName; use super::CurpNode; +use crate::log_entry::EntryData; +use crate::log_entry::LogEntry; use crate::member::Membership; use crate::rpc::connect::InnerConnectApiWrapper; use crate::rpc::inner_connects; @@ -26,6 +29,7 @@ use crate::rpc::ProposeId; use crate::rpc::Redirect; use crate::server::raw_curp::node_state::NodeState; +// Leader methods impl, RC: RoleChange> CurpNode { /// Performs a membership change to the cluster pub(crate) async fn change_membership( @@ -43,13 +47,36 @@ impl, RC: RoleChange> CurpNode { return Err(CurpError::invalid_member_change()); } for config in configs { - let propose_id = self.update_config(config)?; + let propose_id = ProposeId(rand::random(), 0); + let index = self.append_and_persist_membership(propose_id, config.clone()); + self.update_states_with_memberships(Some((index, config)))?; + // Leader also needs to update transferee + self.curp.update_transferee(); self.wait_commit(Some(propose_id)).await; } Ok(ChangeMembershipResponse {}) } + /// Wait the command with the propose id to be committed + async fn wait_commit>(&self, propose_ids: Ids) { + self.curp.wait_propose_ids(propose_ids).await; + } + + /// Append and persist the membership log entry + fn append_and_persist_membership(&self, propose_id: ProposeId, config: Membership) -> LogIndex { + let entries: Vec<_> = self + .curp + .push_log_entries(Some((propose_id, config.clone().into()))) + .collect(); + let to_persist: Vec<_> = entries.iter().map(Arc::as_ref).collect(); + self.curp.persistent_log_entries(&to_persist); + entries + .last() + .unwrap_or_else(|| unreachable!("should contains at least one entry")) + .index + } + /// Ensures there are no overlapping ids fn ensure_non_overlapping(changes: Changes) -> Result, CurpError> where @@ -69,44 +96,63 @@ impl, RC: RoleChange> CurpNode { Err(CurpError::InvalidConfig(())) } + /// Ensures that the current node is the leader + fn ensure_leader(&self) -> Result<(), CurpError> { + let (leader_id, term, is_leader) = self.curp.leader(); + if is_leader { + return Ok(()); + } + Err(CurpError::Redirect(Redirect { + leader_id: leader_id.map(Into::into), + term, + })) + } +} + +// Common methods for both leader and follower +impl, RC: RoleChange> CurpNode { /// Updates the membership config - fn update_config(&self, config: Membership) -> Result { - let propose_id = ProposeId(rand::random(), 0); - let connects = self.connect_nodes(&config); - let index = self.curp.push_membership_log(propose_id, config.clone()); - self.curp.update_membership_configs(Some((index, config)))?; + #[allow(clippy::pattern_type_mismatch)] // can't fix + pub(crate) fn update_states_with_memberships(&self, entries: I) -> Result<(), CurpError> + where + I: IntoIterator, + { + let entries: Vec<_> = entries.into_iter().collect(); + let Some((_, last)) = entries.last() else { + return Ok(()); + }; + let connects = self.connect_nodes(last); + self.curp.append_to_membership_states(entries)?; let new_states = self.curp.update_node_states(connects); self.spawn_sync_follower_tasks(new_states.into_values()); self.curp.update_role(); - self.curp.update_transferee(); - Ok(propose_id) + Ok(()) } - /// Wait the command with the propose id to be committed - async fn wait_commit>(&self, propose_ids: Ids) { - self.curp.wait_propose_ids(propose_ids).await; + /// Filter out membership log entries + pub(crate) fn filter_membership_entries( + entries: I, + ) -> impl Iterator + where + E: AsRef>, + I: IntoIterator, + { + entries.into_iter().filter_map(|entry| { + let entry = entry.as_ref(); + if let EntryData::Member(ref m) = entry.entry_data { + Some((entry.index, m.clone())) + } else { + None + } + }) } - ///// Updates the membership based on the given change and waits for - ///// the proposal to be committed - //async fn update_and_wait(&self, config: Membership) -> Result<(), CurpError> { - // let entries = propose_ids.clone().into_iter().zip(configs.clone()); - // let indices = self.curp.push_membership_logs(entries); - // let connects = self.connect_nodes(configs.last().unwrap()); - // self.curp - // .update_membership_configs(indices.into_iter().zip(configs))?; - // let new_states = self.curp.update_node_states(connects); - // - // self.spawn_sync_follower_tasks(new_states.into_values()); - // self.curp.update_role(); - // self.curp.update_transferee(); - // - // Ok(()) - //} - /// Connect to nodes of the given membership config - fn connect_nodes(&self, config: &Membership) -> BTreeMap { + pub(crate) fn connect_nodes( + &self, + config: &Membership, + ) -> BTreeMap { let nodes = config .nodes .iter() @@ -116,22 +162,6 @@ impl, RC: RoleChange> CurpNode { inner_connects(nodes, self.curp.client_tls_config()).collect() } - /// Executes an update operation and captures the difference in node states before and after the update. - pub(super) fn with_states_difference R>( - &self, - update: Update, - ) -> (Vec, R) { - let old = self.curp.clone_node_states(); - let result = update(); - let new = self.curp.clone_node_states(); - let new_states = new - .into_iter() - .filter_map(|(id, state)| (!old.contains_key(&id)).then_some(state)) - .collect(); - - (new_states, result) - } - /// Spawns background follower sync tasks pub(super) fn spawn_sync_follower_tasks(&self, new_nodes: impl IntoIterator) { let task_manager = self.curp.task_manager(); @@ -148,16 +178,4 @@ impl, RC: RoleChange> CurpNode { }); } } - - /// Ensures that the current node is the leader - fn ensure_leader(&self) -> Result<(), CurpError> { - let (leader_id, term, is_leader) = self.curp.leader(); - if is_leader { - return Ok(()); - } - Err(CurpError::Redirect(Redirect { - leader_id: leader_id.map(Into::into), - term, - })) - } } diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index ffa64d08f..1724a47bb 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -412,21 +412,23 @@ impl, RC: RoleChange> CurpNode { req: &AppendEntriesRequest, ) -> Result { let entries = req.entries()?; - let (new_nodes, result) = self.with_states_difference(|| { - self.curp.handle_append_entries( - req.term, - req.leader_id, - req.prev_log_index, - req.prev_log_term, - entries, - req.leader_commit, - ) - }); + let leader_commit = req.leader_commit; + let membership_entries: Vec<_> = Self::filter_membership_entries(&entries).collect(); + let result = self.curp.handle_append_entries( + req.term, + req.leader_id, + req.prev_log_index, + req.prev_log_term, + entries, + leader_commit, + ); let resp = match result { - Ok((term, to_persist)) => { + Ok((term, truncate_at, to_persist)) => { self.storage .put_log_entries(&to_persist.iter().map(Arc::as_ref).collect::>())?; - self.spawn_sync_follower_tasks(new_nodes); + self.update_states_with_memberships(membership_entries)?; + self.curp + .update_membership_indices(Some(truncate_at), Some(leader_commit)); AppendEntriesResponse::new_accept(term) } Err((term, hint)) => AppendEntriesResponse::new_reject(term, hint), diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 51b762308..5965e593a 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -1,4 +1,5 @@ use std::collections::BTreeMap; +use std::sync::Arc; use curp_external_api::cmd::Command; use curp_external_api::role_change::RoleChange; @@ -9,7 +10,6 @@ use crate::log_entry::EntryData; use crate::log_entry::LogEntry; use crate::member::Membership; use crate::rpc::connect::InnerConnectApiWrapper; -use crate::rpc::inner_connects; use crate::rpc::Change; use crate::rpc::ProposeId; use crate::server::StorageApi; @@ -33,21 +33,28 @@ impl RawCurp { ms_r.cluster().committed().changes(changes) } - /// Push membership configs into log - pub(crate) fn push_membership_log( + /// Push some logs into `Log` + /// TODO: replace `push_logs` + pub(crate) fn push_log_entries( &self, - propose_id: ProposeId, - config: Membership, - ) -> LogIndex { + entries: Logs, + ) -> impl Iterator>> + where + Logs: IntoIterator)>, + { let mut log_w = self.log.write(); let st_r = self.st.read(); - log_w.push(st_r.term, propose_id, config).index + entries + .into_iter() + .map(|(id, entry)| log_w.push(st_r.term, id, entry)) + .collect::>() + .into_iter() } /// Append configs to membership state /// /// This method will also performs blocking IO - pub(crate) fn update_membership_configs( + pub(crate) fn append_to_membership_states( &self, entries: Entries, ) -> Result<(), StorageError> @@ -83,24 +90,6 @@ impl RawCurp { } } - /// Filter out membership log entries - pub(crate) fn filter_membership_logs( - entries: I, - ) -> impl Iterator - where - E: AsRef>, - I: IntoIterator, - { - entries.into_iter().filter_map(|entry| { - let entry = entry.as_ref(); - if let EntryData::Member(ref m) = entry.entry_data { - Some((entry.index, m.clone())) - } else { - None - } - }) - } - /// Updates membership indices pub(crate) fn update_membership_indices( &self, @@ -112,11 +101,6 @@ impl RawCurp { let __ignore = commit.map(|index| ms_w.cluster_mut().update_commit(index)); } - /// Clone the node states - pub(crate) fn clone_node_states(&self) -> BTreeMap { - self.ctx.node_states.clone_inner() - } - /// Updates the role of the node based on the current membership state pub(crate) fn update_role(&self) { let ms = self.ms.read(); @@ -134,18 +118,4 @@ impl RawCurp { st_w.leader_id = None; } } - - ///// Actions on membership update - ///// - ///// Returns the newly added nodes - //fn on_membership_update(&self, membership: &Membership) { - // let node_ids: BTreeSet<_> = membership.nodes.keys().copied().collect(); - // let new_connects = self.build_connects(membership); - // let connect_to = move |ids: &BTreeSet| { - // ids.iter() - // .filter_map(|id| new_connects.get(id).cloned()) - // .collect::>() - // }; - // self.ctx.node_states.update_with(&node_ids, connect_to); - //} } diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 1028502a4..554a854b2 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -462,7 +462,7 @@ impl RawCurp { } /// Term, entries -type AppendEntriesSuccess = (u64, Vec>>); +type AppendEntriesSuccess = (u64, LogIndex, Vec>>); /// Term, index type AppendEntriesFailure = (u64, LogIndex); @@ -551,22 +551,15 @@ impl RawCurp { }); self.entry_process_multi(&mut log_w, &to_process, term); - let log_r = RwLockWriteGuard::downgrade(log_w); - self.persistent_log_entries( - &log_entries.iter().map(Arc::as_ref).collect::>(), - &log_r, - ); + self.persistent_log_entries(&log_entries.iter().map(Arc::as_ref).collect::>()); log_entries } /// Persistent log entries - /// - /// NOTE: A `&Log` is required because we do not want the `Log` structure - /// gets mutated during the persistent #[allow(clippy::panic)] #[allow(dropping_references)] - fn persistent_log_entries(&self, entries: &[&LogEntry], _log: &Log) { + pub(crate) fn persistent_log_entries(&self, entries: &[&LogEntry]) { // We panic when the log persistence fails because it likely indicates an // unrecoverable error. Our WAL implementation does not support rollback // on failure, as a file write syscall is not guaranteed to be atomic. @@ -638,8 +631,7 @@ impl RawCurp { debug!("{} gets new log[{}]", self.id(), entry.index); self.entry_process_single(&mut log_w, entry.as_ref(), true, st_r.term); - let log_r = RwLockWriteGuard::downgrade(log_w); - self.persistent_log_entries(&[entry.as_ref()], &log_r); + self.persistent_log_entries(&[entry.as_ref()]); Ok(()) } @@ -710,17 +702,9 @@ impl RawCurp { // append log entries let mut log_w = self.log.write(); - let membership_configs: Vec<_> = Self::filter_membership_logs(&entries).collect(); let (to_persist, truncate_at) = log_w .try_append_entries(entries, prev_log_index, prev_log_term) .map_err(|_ig| (term, log_w.commit_index + 1))?; - self.update_membership_configs(membership_configs) - .map_err(|err| { - error!("append memebrship entires failed: {err}"); - (term, log_w.commit_index + 1) - })?; - self.update_membership_indices(Some(truncate_at), Some(leader_commit)); - self.update_role(); // update commit index let prev_commit_index = log_w.commit_index; log_w.commit_index = min(leader_commit, log_w.last_log_index()); @@ -728,7 +712,7 @@ impl RawCurp { self.apply(&mut *log_w); } - Ok((term, to_persist)) + Ok((term, truncate_at, to_persist)) } /// Handle `append_entries` response @@ -954,7 +938,7 @@ impl RawCurp { // TODO: Generate client id in the same way as client let propose_id = ProposeId(rand::random(), 0); let entry = log_w.push(st_w.term, propose_id, EntryData::Empty); - self.persistent_log_entries(&[&entry], &log_w); + self.persistent_log_entries(&[&entry]); self.recover_from_spec_pools(&st_w, &mut log_w, spec_pools); self.recover_ucp_from_log(&log_w); let last_log_index = log_w.last_log_index(); @@ -1623,7 +1607,7 @@ impl RawCurp { entries.push(entry); } - self.persistent_log_entries(&entries.iter().map(Arc::as_ref).collect::>(), log); + self.persistent_log_entries(&entries.iter().map(Arc::as_ref).collect::>()); } /// Recover the ucp from uncommitted log entries diff --git a/crates/curp/src/server/raw_curp/node_state.rs b/crates/curp/src/server/raw_curp/node_state.rs index 6523dee09..bfe6ff456 100644 --- a/crates/curp/src/server/raw_curp/node_state.rs +++ b/crates/curp/src/server/raw_curp/node_state.rs @@ -162,11 +162,6 @@ impl NodeStates { .zip(states_r.values().map(NodeState::connect).cloned()) .collect() } - - /// Clone the inner map - pub(super) fn clone_inner(&self) -> BTreeMap { - self.states.read().clone() - } } /// The state of a node diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index a6445ca6e..16a2459bd 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -677,7 +677,7 @@ fn add_node_should_add_new_node_to_curp() { let original_membership = Membership::new(vec![(0..3).collect()], BTreeMap::default()); let membership = Membership::new(vec![(0..4).collect()], BTreeMap::default()); let _ignore = curp - .update_membership_configs(Some((2, membership))) + .append_to_membership_states(Some((2, membership))) .unwrap(); assert!(curp .effective_membership() @@ -686,7 +686,7 @@ fn add_node_should_add_new_node_to_curp() { .flatten() .any(|id| *id == 3)); let _ignore = curp - .update_membership_configs(Some((1, original_membership))) + .append_to_membership_states(Some((1, original_membership))) .unwrap(); assert!(!curp .effective_membership() @@ -706,7 +706,7 @@ fn add_learner_node_and_promote_should_success() { .pop() .unwrap(); let _ignore = curp - .update_membership_configs(Some((1, membership))) + .append_to_membership_states(Some((1, membership))) .unwrap(); assert!(!curp .effective_membership() @@ -720,7 +720,7 @@ fn add_learner_node_and_promote_should_success() { .pop() .unwrap(); let _ignore = curp - .update_membership_configs(Some((2, membership))) + .append_to_membership_states(Some((2, membership))) .unwrap(); assert!(curp .effective_membership() @@ -758,7 +758,7 @@ fn remove_node_should_remove_node_from_curp() { .pop() .unwrap(); let _ignore = curp - .update_membership_configs(Some((1, membership))) + .append_to_membership_states(Some((1, membership))) .unwrap(); assert!(!curp .effective_membership() @@ -815,7 +815,7 @@ fn leader_handle_move_leader() { .pop() .unwrap(); let _ignore = curp - .update_membership_configs(Some((1, membership))) + .append_to_membership_states(Some((1, membership))) .unwrap(); let res = curp.handle_move_leader(1234); @@ -865,7 +865,7 @@ fn leader_will_reset_transferee_after_remove_node() { BTreeMap::default(), ); let _ignore = curp - .update_membership_configs(Some((1, membership))) + .append_to_membership_states(Some((1, membership))) .unwrap(); curp.update_transferee(); assert!(curp.get_transferee().is_none()); From 80c43f3d1ab3918d83eb6354bb4dad4bc095e9c2 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 27 Sep 2024 18:39:00 +0800 Subject: [PATCH 208/322] refactor: log update code Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- .../curp/src/server/curp_node/member_impl.rs | 16 +-- crates/curp/src/server/curp_node/mod.rs | 33 ++--- crates/curp/src/server/raw_curp/log.rs | 1 + .../curp/src/server/raw_curp/member_impl.rs | 22 --- crates/curp/src/server/raw_curp/mod.rs | 127 ++++++++---------- 5 files changed, 69 insertions(+), 130 deletions(-) diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 17e10d0db..ab8bdabd1 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -48,7 +48,7 @@ impl, RC: RoleChange> CurpNode { } for config in configs { let propose_id = ProposeId(rand::random(), 0); - let index = self.append_and_persist_membership(propose_id, config.clone()); + let index = self.curp.push_log_entry(propose_id, config.clone()).index; self.update_states_with_memberships(Some((index, config)))?; // Leader also needs to update transferee self.curp.update_transferee(); @@ -63,20 +63,6 @@ impl, RC: RoleChange> CurpNode { self.curp.wait_propose_ids(propose_ids).await; } - /// Append and persist the membership log entry - fn append_and_persist_membership(&self, propose_id: ProposeId, config: Membership) -> LogIndex { - let entries: Vec<_> = self - .curp - .push_log_entries(Some((propose_id, config.clone().into()))) - .collect(); - let to_persist: Vec<_> = entries.iter().map(Arc::as_ref).collect(); - self.curp.persistent_log_entries(&to_persist); - entries - .last() - .unwrap_or_else(|| unreachable!("should contains at least one entry")) - .index - } - /// Ensures there are no overlapping ids fn ensure_non_overlapping(changes: Changes) -> Result, CurpError> where diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 1724a47bb..bc8a632c8 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -83,10 +83,6 @@ pub(super) struct Propose { pub(super) cmd: Arc, /// Propose id pub(super) id: ProposeId, - /// Term the client proposed - /// NOTE: this term should be equal to the cluster's latest term - /// for the propose to be accepted. - pub(super) term: u64, /// Tx used for sending the streaming response back to client pub(super) resp_tx: Arc, } @@ -101,7 +97,6 @@ where Ok(Self { cmd, id: req.propose_id(), - term: req.term, resp_tx, }) } @@ -111,20 +106,10 @@ where self.cmd.is_read_only() } - /// Gets response sender - fn response_tx(&self) -> Arc { - Arc::clone(&self.resp_tx) - } - /// Convert self into parts - fn into_parts(self) -> (Arc, ProposeId, u64, Arc) { - let Self { - cmd, - id, - term, - resp_tx, - } = self; - (cmd, id, term, resp_tx) + fn into_parts(self) -> ((ProposeId, Arc), Arc) { + let Self { cmd, id, resp_tx } = self; + ((id, cmd), resp_tx) } } @@ -263,9 +248,7 @@ impl, RC: RoleChange> CurpNode { for propose in proposes { info!("handle read only cmd: {:?}", propose.cmd); // TODO: Disable dedup if the command is read only or commute - let Propose { - cmd, resp_tx, id, .. - } = propose; + let Propose { cmd, id, resp_tx } = propose; // Use default value for the entry as we don't need to put it into curp log let entry = Arc::new(LogEntry::new(0, 0, id, Arc::clone(&cmd))); let wait_conflict = curp.wait_conflicts_synced(cmd); @@ -297,9 +280,11 @@ impl, RC: RoleChange> CurpNode { info!("handle mutative cmd: {:?}, conflict: {conflict}", p.cmd); p.resp_tx.set_conflict(conflict); } - let resp_txs: Vec<_> = proposes.iter().map(Propose::response_tx).collect(); - let logs: Vec<_> = proposes.into_iter().map(Propose::into_parts).collect(); - let entries = curp.push_logs(logs); + let (cmds, resp_txs): (Vec<_>, Vec<_>) = + proposes.into_iter().map(Propose::into_parts).unzip(); + let entries = curp.push_log_entries(cmds); + curp.insert_resp_txs(entries.iter().map(|e| e.index).zip(resp_txs.clone())); + //let entries = curp.push_logs(logs); #[allow(clippy::pattern_type_mismatch)] // Can't be fixed entries .into_iter() diff --git a/crates/curp/src/server/raw_curp/log.rs b/crates/curp/src/server/raw_curp/log.rs index 9ddef76be..6c9f89c56 100644 --- a/crates/curp/src/server/raw_curp/log.rs +++ b/crates/curp/src/server/raw_curp/log.rs @@ -112,6 +112,7 @@ pub(super) struct Log { pub(super) commit_index: LogIndex, /// Index of highest log entry sent to after sync. `last_as` should always be less than or equal to `last_exe`. pub(super) last_as: LogIndex, + // FIXME: Speculative execution does not update the state machine, rewrite the snapshot logic /// Index of highest log entry sent to speculatively exe. `last_exe` should always be greater than or equal to `last_as`. pub(super) last_exe: LogIndex, /// Entries to keep in memory diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 5965e593a..03c7bcff9 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -1,17 +1,13 @@ use std::collections::BTreeMap; -use std::sync::Arc; use curp_external_api::cmd::Command; use curp_external_api::role_change::RoleChange; use curp_external_api::LogIndex; use utils::parking_lot_lock::RwLockMap; -use crate::log_entry::EntryData; -use crate::log_entry::LogEntry; use crate::member::Membership; use crate::rpc::connect::InnerConnectApiWrapper; use crate::rpc::Change; -use crate::rpc::ProposeId; use crate::server::StorageApi; use crate::server::StorageError; @@ -33,24 +29,6 @@ impl RawCurp { ms_r.cluster().committed().changes(changes) } - /// Push some logs into `Log` - /// TODO: replace `push_logs` - pub(crate) fn push_log_entries( - &self, - entries: Logs, - ) -> impl Iterator>> - where - Logs: IntoIterator)>, - { - let mut log_w = self.log.write(); - let st_r = self.st.read(); - entries - .into_iter() - .map(|(id, entry)| log_w.push(st_r.term, id, entry)) - .collect::>() - .into_iter() - } - /// Append configs to membership state /// /// This method will also performs blocking IO diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 554a854b2..fe5cc6426 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -31,7 +31,6 @@ use opentelemetry::KeyValue; use parking_lot::Mutex; use parking_lot::RwLock; use parking_lot::RwLockUpgradableReadGuard; -use parking_lot::RwLockWriteGuard; use tokio::sync::oneshot; #[cfg(not(madsim))] use tonic::transport::ClientTlsConfig; @@ -523,37 +522,66 @@ impl RawCurp { conflicts } - /// Handles leader propose - pub(super) fn push_logs( + /// Push one log, called by the leader + /// + /// This method performs the following operations: + /// * Appends the provided entries to the `Log` + /// * (Does I/O) Persists the log entries to the Write-Ahead-Log (WAL) storage + /// * Triggers replication events + #[allow(clippy::unwrap_used)] // contains exactly one entry + pub(super) fn push_log_entry( &self, - proposes: Vec<(Arc, ProposeId, u64, Arc)>, - ) -> Vec>> { - let term = proposes - .first() - .unwrap_or_else(|| unreachable!("no propose in proposes")) - .2; - let mut log_entries = Vec::with_capacity(proposes.len()); - let mut to_process = Vec::with_capacity(proposes.len()); + propose_id: ProposeId, + entry: Entry, + ) -> Arc> + where + Entry: Into>, + { + self.push_log_entries(Some((propose_id, entry))) + .pop() + .unwrap() + } + + /// Push some logs, called by the leader + /// + /// This method performs the following operations: + /// * Appends the provided entries to the `Log` + /// * (Does I/O) Persists the log entries to the Write-Ahead-Log (WAL) storage + /// * Triggers replication events + pub(super) fn push_log_entries(&self, entries: Logs) -> Vec>> + where + Entry: Into>, + Logs: IntoIterator, + { let mut log_w = self.log.write(); - self.ctx.resp_txs.map_lock(|mut tx_map| { - for propose in proposes { - let (cmd, id, _term, resp_tx) = propose; - let entry = log_w.push(term, id, cmd); - let index = entry.index; - let conflict = resp_tx.is_conflict(); - to_process.push((index, conflict)); - log_entries.push(entry); - assert!( - tx_map.insert(index, Arc::clone(&resp_tx)).is_none(), - "Should not insert resp_tx twice" - ); - } - }); - self.entry_process_multi(&mut log_w, &to_process, term); + let st_r = self.st.read(); + let entries: Vec<_> = entries + .into_iter() + .map(|(id, entry)| log_w.push(st_r.term, id, entry)) + .collect(); + let entries_ref: Vec<_> = entries.iter().map(Arc::as_ref).collect(); + self.persistent_log_entries(&entries_ref); + self.notify_sync_events(&log_w); - self.persistent_log_entries(&log_entries.iter().map(Arc::as_ref).collect::>()); + for e in &entries { + self.update_index_single_node(&mut log_w, e.index, st_r.term); + } - log_entries + entries + } + + /// Insert into `Context.resp_txs` + pub(super) fn insert_resp_txs(&self, txs: Txs) + where + Txs: IntoIterator)>, + { + let mut tx_map = self.ctx.resp_txs.lock(); + for (index, tx) in txs { + assert!( + tx_map.insert(index, tx).is_none(), + "Should not insert resp_tx twice" + ); + } } /// Persistent log entries @@ -626,12 +654,8 @@ impl RawCurp { return Err(CurpError::LeaderTransfer("leader transferring".to_owned())); } self.deduplicate(propose_id, None)?; - let mut log_w = self.log.write(); - let entry = log_w.push(st_r.term, propose_id, EntryData::Shutdown); - debug!("{} gets new log[{}]", self.id(), entry.index); - self.entry_process_single(&mut log_w, entry.as_ref(), true, st_r.term); - - self.persistent_log_entries(&[entry.as_ref()]); + let index = self.push_log_entry(propose_id, EntryData::Shutdown).index; + debug!("{} gets new log[{index}]", self.id()); Ok(()) } @@ -1683,41 +1707,6 @@ impl RawCurp { } } - /// Entry process shared by `handle_xxx` - #[allow(clippy::pattern_type_mismatch)] // Can't be fixed - fn entry_process_multi(&self, log: &mut Log, entries: &[(u64, bool)], term: u64) { - if let Some(last_no_conflict) = entries - .iter() - .rev() - .find(|(_, conflict)| *conflict) - .map(|(index, _)| *index) - { - log.last_exe = last_no_conflict; - } - let highest_index = entries - .last() - .unwrap_or_else(|| unreachable!("no log in entries")) - .0; - self.notify_sync_events(log); - self.update_index_single_node(log, highest_index, term); - } - - /// Entry process shared by `handle_xxx` - fn entry_process_single( - &self, - log_w: &mut RwLockWriteGuard<'_, Log>, - entry: &LogEntry, - conflict: bool, - term: u64, - ) { - let index = entry.index; - if !conflict { - log_w.last_exe = index; - } - self.notify_sync_events(log_w); - self.update_index_single_node(log_w, index, term); - } - /// Process deduplication and acknowledge the `first_incomplete` for this /// client id pub(crate) fn deduplicate( From 3f658b006a73bfa96db1803bb3ecb665c4dec839 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 27 Sep 2024 19:25:45 +0800 Subject: [PATCH 209/322] chore: split leader and follower membership impls in `RawCurp` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- .../curp/src/server/curp_node/member_impl.rs | 2 +- .../curp/src/server/raw_curp/member_impl.rs | 25 +++++++++++-------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index ab8bdabd1..429e6331e 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -95,7 +95,7 @@ impl, RC: RoleChange> CurpNode { } } -// Common methods for both leader and follower +// Common methods shared by both leader and followers impl, RC: RoleChange> CurpNode { /// Updates the membership config #[allow(clippy::pattern_type_mismatch)] // can't fix diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 03c7bcff9..486b84b80 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -19,6 +19,8 @@ use super::Role; // - log // - ms // - node_states + +// Leader methods impl RawCurp { /// Generate memberships based on the provided change pub(crate) fn generate_membership(&self, changes: Changes) -> Vec @@ -29,6 +31,19 @@ impl RawCurp { ms_r.cluster().committed().changes(changes) } + /// Updates the role if the node is leader + pub(crate) fn update_transferee(&self) { + let Some(transferee) = self.lst.get_transferee() else { + return; + }; + if !self.ms.map_read(|ms| ms.is_member(transferee)) { + self.lst.reset_transferee(); + } + } +} + +// Common methods shared by both leader and followers +impl RawCurp { /// Append configs to membership state /// /// This method will also performs blocking IO @@ -58,16 +73,6 @@ impl RawCurp { self.ctx.node_states.update_with(connects) } - /// Updates the role if the node is leader - pub(crate) fn update_transferee(&self) { - let Some(transferee) = self.lst.get_transferee() else { - return; - }; - if !self.ms.map_read(|ms| ms.is_member(transferee)) { - self.lst.reset_transferee(); - } - } - /// Updates membership indices pub(crate) fn update_membership_indices( &self, From 2155427d406098ec6a6b4b7c19bdd304727f29b8 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 27 Sep 2024 19:32:22 +0800 Subject: [PATCH 210/322] refactor: prevent multiple concurrent membership changes Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 26 ++++++++++++++++--- .../curp/src/server/raw_curp/member_impl.rs | 3 +-- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index f0ec79943..30529b6d2 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -148,6 +148,7 @@ impl MembershipState { } /// Returns the committed membership + #[cfg(test)] pub(crate) fn committed(&self) -> &Membership { &self .entries @@ -158,15 +159,32 @@ impl MembershipState { .membership } + /// Generates a new membership from `Change` + /// + /// Returns an empty `Vec` if there's an on-going membership change + pub(crate) fn changes(&self, changes: Changes) -> Vec + where + Changes: IntoIterator, + { + if !self.has_uncommitted() { + return vec![]; + } + self.last().membership.changes(changes) + } + /// Returns the effective membership pub(crate) fn effective(&self) -> &Membership { - &self.entries.last().unwrap().membership + &self.last().membership } - #[allow(unused)] // FIXME: performs checking /// Checks if there's an uncommitted membership change - pub(crate) fn has_uncommitted(&self) -> bool { - self.entries.last().unwrap().index > self.commit_index + fn has_uncommitted(&self) -> bool { + self.last().index > self.commit_index + } + + /// Gets the last entry + fn last(&self) -> &MembershipEntry { + self.entries.last().unwrap() } } diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 486b84b80..704b19fa4 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -27,8 +27,7 @@ impl RawCurp { where Changes: IntoIterator, { - let ms_r = self.ms.read(); - ms_r.cluster().committed().changes(changes) + self.ms.read().cluster().changes(changes) } /// Updates the role if the node is leader From dcf68ec1bb50c834afed33714e6a94f58202d09f Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 27 Sep 2024 19:35:50 +0800 Subject: [PATCH 211/322] refactor: remove outdated membership state entries on commit Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 30529b6d2..fb92aa405 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -145,6 +145,7 @@ impl MembershipState { /// Commit a membership index pub(crate) fn update_commit(&mut self, index: LogIndex) { self.commit_index = index; + self.entries.retain(|entry| entry.index >= index); } /// Returns the committed membership From 4b447b678874c16e236f11d2665fc8dc24f400d5 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sun, 29 Sep 2024 09:32:19 +0800 Subject: [PATCH 212/322] fix: MembershipState Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index fb92aa405..6c088c717 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -167,7 +167,7 @@ impl MembershipState { where Changes: IntoIterator, { - if !self.has_uncommitted() { + if self.has_uncommitted() { return vec![]; } self.last().membership.changes(changes) From f84ac700e1cb34024c4dc28d7f66b9b3d893b132 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sun, 29 Sep 2024 10:42:13 +0800 Subject: [PATCH 213/322] fix: Membership Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 63 ++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 6c088c717..194c05ada 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -228,13 +228,7 @@ impl Membership { Changes: IntoIterator, { let mut nodes = self.nodes.clone(); - let members = self.members.clone(); - let is_member = |id: &u64| members.iter().any(|s| s.contains(id)); - let mut set = self - .members - .last() - .unwrap_or_else(|| unreachable!("there should be at least one member set")) - .clone(); + let mut target = self.current_member_set().clone(); for change in changes { match change { @@ -250,52 +244,61 @@ impl Membership { } } Change::Promote(id) => { - if !nodes.contains_key(&id) || is_member(&id) { + if self.is_current_member(id) { return vec![]; } - let _ignore = set.insert(id); + let _ignore = target.insert(id); } Change::Demote(id) => { - if !nodes.contains_key(&id) || !is_member(&id) { + if !self.is_current_member(id) { return vec![]; } - let _ignore = set.remove(&id); + let _ignore = target.remove(&id); } } } - self.all_coherent(&set) + let all = Self::all_coherent(self.members.clone(), &target); + + all.into_iter() + .map(|members| Self { + members, + nodes: nodes.clone(), + }) + .collect() + } + + /// Gets the current member set + #[allow(clippy::unwrap_used)] // members should never be empty + fn current_member_set(&self) -> &BTreeSet { + self.members.last().unwrap() + } + + /// Returns `true` if the given id exists in the current member set + fn is_current_member(&self, id: u64) -> bool { + self.current_member_set().contains(&id) } /// Generates all coherent membership to reach the target - fn all_coherent(&self, target: &BTreeSet) -> Vec { - iter::successors(Some(self.clone()), |current| { - let next = Self::next_coherent(current, target.clone()); - (current != &next).then_some(next) + fn all_coherent( + current: Vec>, + target: &BTreeSet, + ) -> Vec>> { + iter::successors(Some(current), |curr| { + let next = Joint::new(curr.clone()) + .coherent(target.clone()) + .into_inner(); + (curr != &next).then_some(next) }) .skip(1) .collect() } - /// Generates a new coherent membership from a quorum set - fn next_coherent(ms: &Self, set: BTreeSet) -> Self { - let next = ms.as_joint_owned().coherent(set).into_inner(); - Self { - members: next, - nodes: ms.nodes.clone(), - } - } - /// Converts to `Joint` pub(crate) fn as_joint(&self) -> Joint, &[BTreeSet]> { Joint::new(self.members.as_slice()) } - /// Converts to `Joint` - pub(crate) fn as_joint_owned(&self) -> Joint, Vec>> { - Joint::new(self.members.clone()) - } - /// Gets the addresses of all members pub(crate) fn members(&self) -> impl Iterator { self.nodes.iter().filter_map(|(id, addr)| { From 0f309564abf71b014e9a36c8e4350ff4497c6433 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sun, 29 Sep 2024 21:15:21 +0800 Subject: [PATCH 214/322] fix: membership changes calculation Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 41 +++++++++++++++++++-------------------- crates/curp/src/quorum.rs | 22 +++++++++++---------- 2 files changed, 32 insertions(+), 31 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 194c05ada..a84f95831 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -228,7 +228,7 @@ impl Membership { Changes: IntoIterator, { let mut nodes = self.nodes.clone(); - let mut target = self.current_member_set().clone(); + let mut set = self.current_member_set().clone(); for change in changes { match change { @@ -247,25 +247,23 @@ impl Membership { if self.is_current_member(id) { return vec![]; } - let _ignore = target.insert(id); + let _ignore = set.insert(id); } Change::Demote(id) => { if !self.is_current_member(id) { return vec![]; } - let _ignore = target.remove(&id); + let _ignore = set.remove(&id); } } } - let all = Self::all_coherent(self.members.clone(), &target); + let target = Self { + members: vec![set], + nodes, + }; - all.into_iter() - .map(|members| Self { - members, - nodes: nodes.clone(), - }) - .collect() + Self::all_coherent(self.clone(), &target) } /// Gets the current member set @@ -280,18 +278,19 @@ impl Membership { } /// Generates all coherent membership to reach the target - fn all_coherent( - current: Vec>, - target: &BTreeSet, - ) -> Vec>> { - iter::successors(Some(current), |curr| { - let next = Joint::new(curr.clone()) - .coherent(target.clone()) + fn all_coherent(current: Self, target: &Self) -> Vec { + let next = |curr: &Self| { + let members = Joint::new(curr.members.clone()) + .coherent(Joint::new(target.members.clone())) .into_inner(); - (curr != &next).then_some(next) - }) - .skip(1) - .collect() + let next = Membership { + members, + nodes: target.nodes.clone(), + }; + (*curr != next).then_some(next) + }; + + iter::successors(Some(current), next).skip(1).collect() } /// Converts to `Joint` diff --git a/crates/curp/src/quorum.rs b/crates/curp/src/quorum.rs index 01c9f8b6b..c3ed2bc58 100644 --- a/crates/curp/src/quorum.rs +++ b/crates/curp/src/quorum.rs @@ -29,17 +29,19 @@ where QS: PartialEq + Clone, { /// Generates a new coherent joint quorum set - pub(crate) fn coherent(&self, qs: QS) -> Self { - if self.sets.iter().any(|s| *s == qs) { - Self::new(vec![qs]) - } else { - // TODO: select the config where the leader is in - let last = self - .sets - .last() - .unwrap_or_else(|| unreachable!("there should be at least one quorum set")); - Self::new(vec![last.clone(), qs]) + pub(crate) fn coherent(&self, other: Self) -> Self { + if self.is_superset(&other) { + return other; } + + // TODO: select the config where the leader is in + let last = self.sets.last().cloned(); + Self::new(last.into_iter().chain(other.sets).collect()) + } + + /// Checks if `self` is a superset of `other` + fn is_superset(&self, other: &Self) -> bool { + other.sets.iter().all(|s| self.sets.contains(s)) } } From d3491b559aa30afd3df530fe8d6f563b2595f9df Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sun, 29 Sep 2024 22:10:33 +0800 Subject: [PATCH 215/322] refactor: use external committed log index state in `MembershipState` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 28 +++++++++---------- .../curp/src/server/raw_curp/member_impl.rs | 5 ++-- crates/curp/src/server/raw_curp/mod.rs | 4 ++- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index a84f95831..a5a0afc19 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -117,8 +117,6 @@ impl NodeMembershipState { pub struct MembershipState { /// Membership entries entries: Vec, - /// Commit log index - commit_index: LogIndex, } #[allow(clippy::unwrap_used)] // `entries` should contains at least one entry @@ -128,7 +126,6 @@ impl MembershipState { let initial_entry = MembershipEntry::new(0, initial_membership); Self { entries: vec![initial_entry], - commit_index: 0, } } @@ -143,18 +140,19 @@ impl MembershipState { } /// Commit a membership index - pub(crate) fn update_commit(&mut self, index: LogIndex) { - self.commit_index = index; - self.entries.retain(|entry| entry.index >= index); + pub(crate) fn commit(&mut self, index: LogIndex) { + if self.last().index >= index { + self.entries.retain(|entry| entry.index >= index); + } } /// Returns the committed membership #[cfg(test)] - pub(crate) fn committed(&self) -> &Membership { + pub(crate) fn committed(&self, commit_index: LogIndex) -> &Membership { &self .entries .iter() - .take_while(|entry| entry.index <= self.commit_index) + .take_while(|entry| entry.index <= commit_index) .last() .unwrap() .membership @@ -163,11 +161,16 @@ impl MembershipState { /// Generates a new membership from `Change` /// /// Returns an empty `Vec` if there's an on-going membership change - pub(crate) fn changes(&self, changes: Changes) -> Vec + pub(crate) fn changes( + &self, + changes: Changes, + commit_index: LogIndex, + ) -> Vec where Changes: IntoIterator, { - if self.has_uncommitted() { + // membership uncommitted, return an empty vec + if self.last().index > commit_index { return vec![]; } self.last().membership.changes(changes) @@ -178,11 +181,6 @@ impl MembershipState { &self.last().membership } - /// Checks if there's an uncommitted membership change - fn has_uncommitted(&self) -> bool { - self.last().index > self.commit_index - } - /// Gets the last entry fn last(&self) -> &MembershipEntry { self.entries.last().unwrap() diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 704b19fa4..eb5a6d233 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -27,7 +27,8 @@ impl RawCurp { where Changes: IntoIterator, { - self.ms.read().cluster().changes(changes) + self.log + .map_read(|log| self.ms.read().cluster().changes(changes, log.commit_index)) } /// Updates the role if the node is leader @@ -80,7 +81,7 @@ impl RawCurp { ) { let mut ms_w = self.ms.write(); let _ignore = truncate_at.map(|index| ms_w.cluster_mut().truncate(index)); - let __ignore = commit.map(|index| ms_w.cluster_mut().update_commit(index)); + let __ignore = commit.map(|index| ms_w.cluster_mut().commit(index)); } /// Updates the role of the node based on the current membership state diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index fe5cc6426..7bee6e8a7 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -1203,7 +1203,9 @@ impl RawCurp { /// Get the committed membership #[cfg(test)] pub(super) fn committed_membership(&self) -> Membership { - self.ms.read().cluster().committed().clone() + let log_r = self.log.read(); + let ms_r = self.ms.read(); + ms_r.cluster().committed(log_r.commit_index).clone() } /// Get `append_entries` request for `follower_id` that contains the latest From 8ade71ca46605ba284b986ae5c9b77b0eb6a82c6 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sun, 29 Sep 2024 23:07:51 +0800 Subject: [PATCH 216/322] fix: use Option for `truncate_at` instead of using `LogIndex::default` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/mod.rs | 2 +- crates/curp/src/server/raw_curp/log.rs | 7 ++----- crates/curp/src/server/raw_curp/mod.rs | 2 +- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index bc8a632c8..76d989050 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -413,7 +413,7 @@ impl, RC: RoleChange> CurpNode { .put_log_entries(&to_persist.iter().map(Arc::as_ref).collect::>())?; self.update_states_with_memberships(membership_entries)?; self.curp - .update_membership_indices(Some(truncate_at), Some(leader_commit)); + .update_membership_indices(truncate_at, Some(leader_commit)); AppendEntriesResponse::new_accept(term) } Err((term, hint)) => AppendEntriesResponse::new_reject(term, hint), diff --git a/crates/curp/src/server/raw_curp/log.rs b/crates/curp/src/server/raw_curp/log.rs index 6c9f89c56..0e7c4b430 100644 --- a/crates/curp/src/server/raw_curp/log.rs +++ b/crates/curp/src/server/raw_curp/log.rs @@ -279,7 +279,7 @@ impl Debug for Log { } /// Type retruned when append success -type AppendSuccess = (Vec>>, LogIndex); +type AppendSuccess = (Vec>>, Option); impl Log { /// Create a new log @@ -362,10 +362,7 @@ impl Log { } // Truncate entries self.truncate(pi); - let truncate_at = self - .entries - .back() - .map_or_else(LogIndex::default, |e| e.inner.index); + let truncate_at = self.entries.back().map(|e| e.inner.index); // Push the remaining entries and record the conf change entries for entry in entries .into_iter() diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 7bee6e8a7..4804dddde 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -461,7 +461,7 @@ impl RawCurp { } /// Term, entries -type AppendEntriesSuccess = (u64, LogIndex, Vec>>); +type AppendEntriesSuccess = (u64, Option, Vec>>); /// Term, index type AppendEntriesFailure = (u64, LogIndex); From 6dc395402c5b74b2c74e32e9c2f846d9058eb8bd Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sun, 29 Sep 2024 23:28:10 +0800 Subject: [PATCH 217/322] refactor: MembershipState::commit Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index a5a0afc19..b5950aeed 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -141,9 +141,16 @@ impl MembershipState { /// Commit a membership index pub(crate) fn commit(&mut self, index: LogIndex) { - if self.last().index >= index { - self.entries.retain(|entry| entry.index >= index); - } + let mut keep = self + .entries + .iter() + .enumerate() + // also skips the last entry + .map(|(i, e)| e.index >= index || i.wrapping_add(1) == self.entries.len()) + .collect::>() + .into_iter(); + + self.entries.retain(|_| keep.next().unwrap()); } /// Returns the committed membership From 07ae7ec5ba93fe7967f27ef9474e2910361f530d Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 30 Sep 2024 10:34:49 +0800 Subject: [PATCH 218/322] refactor: merge membership state update methods This change makes the update atomic Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 2 +- .../curp/src/server/curp_node/member_impl.rs | 20 +++----- crates/curp/src/server/curp_node/mod.rs | 13 +++-- .../curp/src/server/raw_curp/member_impl.rs | 50 +++++++++++-------- crates/curp/src/server/raw_curp/mod.rs | 2 +- crates/curp/src/server/raw_curp/tests.rs | 30 +++-------- 6 files changed, 54 insertions(+), 63 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index b5950aeed..8be7423ff 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -113,7 +113,7 @@ impl NodeMembershipState { } /// Membership state stored in current node -#[derive(Serialize, Deserialize, Debug, Default)] +#[derive(Clone, Serialize, Deserialize, Debug, Default)] pub struct MembershipState { /// Membership entries entries: Vec, diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 429e6331e..8a32cf5d8 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -49,7 +49,10 @@ impl, RC: RoleChange> CurpNode { for config in configs { let propose_id = ProposeId(rand::random(), 0); let index = self.curp.push_log_entry(propose_id, config.clone()).index; - self.update_states_with_memberships(Some((index, config)))?; + self.update_states_with_membership(&config); + self.curp + .update_membership_state(None, Some((index, config)), None); + self.curp.persistent_membership_state()?; // Leader also needs to update transferee self.curp.update_transferee(); self.wait_commit(Some(propose_id)).await; @@ -98,22 +101,11 @@ impl, RC: RoleChange> CurpNode { // Common methods shared by both leader and followers impl, RC: RoleChange> CurpNode { /// Updates the membership config - #[allow(clippy::pattern_type_mismatch)] // can't fix - pub(crate) fn update_states_with_memberships(&self, entries: I) -> Result<(), CurpError> - where - I: IntoIterator, - { - let entries: Vec<_> = entries.into_iter().collect(); - let Some((_, last)) = entries.last() else { - return Ok(()); - }; - let connects = self.connect_nodes(last); - self.curp.append_to_membership_states(entries)?; + pub(crate) fn update_states_with_membership(&self, membership: &Membership) { + let connects = self.connect_nodes(membership); let new_states = self.curp.update_node_states(connects); self.spawn_sync_follower_tasks(new_states.into_values()); self.curp.update_role(); - - Ok(()) } /// Filter out membership log entries diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 76d989050..235a632a8 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -407,13 +407,20 @@ impl, RC: RoleChange> CurpNode { entries, leader_commit, ); + #[allow(clippy::pattern_type_mismatch)] // can't fix let resp = match result { Ok((term, truncate_at, to_persist)) => { self.storage .put_log_entries(&to_persist.iter().map(Arc::as_ref).collect::>())?; - self.update_states_with_memberships(membership_entries)?; - self.curp - .update_membership_indices(truncate_at, Some(leader_commit)); + if let Some((_, config)) = membership_entries.last() { + self.update_states_with_membership(config); + self.curp.update_membership_state( + truncate_at, + membership_entries, + Some(leader_commit), + ); + self.curp.persistent_membership_state()?; + } AppendEntriesResponse::new_accept(term) } Err((term, hint)) => AppendEntriesResponse::new_reject(term, hint), diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index eb5a6d233..2e9744a1e 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -44,25 +44,42 @@ impl RawCurp { // Common methods shared by both leader and followers impl RawCurp { - /// Append configs to membership state + /// Updates the membership state /// - /// This method will also performs blocking IO - pub(crate) fn append_to_membership_states( + /// # Arguments + /// + /// * `truncate` - An optional `LogIndex` up to which the membership log should be truncated. + /// * `append` - An iterator of tuples `(LogIndex, Membership)` to be appended to the membership log. + /// * `commit` - An optional `LogIndex` up to which the membership log should be committed. + pub(crate) fn update_membership_state( &self, - entries: Entries, - ) -> Result<(), StorageError> - where + truncate: Option, + append: Entries, + commit: Option, + ) where Entries: IntoIterator, { let mut ms_w = self.ms.write(); - for (index, config) in entries { + if let Some(index) = truncate { + ms_w.cluster_mut().truncate(index); + } + for (index, config) in append { ms_w.cluster_mut().append(index, config); - self.ctx - .curp_storage - .put_membership(ms_w.node_id(), ms_w.cluster())?; } + if let Some(index) = commit { + ms_w.cluster_mut().commit(index); + } + } - Ok(()) + /// Persists the current membership state to storage. + /// + /// This method should only be called when new entries are appended to the membership state. + pub(crate) fn persistent_membership_state(&self) -> Result<(), StorageError> { + let (node_id, membership_state) = + self.ms.map_read(|ms| (ms.node_id(), ms.cluster().clone())); + self.ctx + .curp_storage + .put_membership(node_id, &membership_state) } /// Updates the node states @@ -73,17 +90,6 @@ impl RawCurp { self.ctx.node_states.update_with(connects) } - /// Updates membership indices - pub(crate) fn update_membership_indices( - &self, - truncate_at: Option, - commit: Option, - ) { - let mut ms_w = self.ms.write(); - let _ignore = truncate_at.map(|index| ms_w.cluster_mut().truncate(index)); - let __ignore = commit.map(|index| ms_w.cluster_mut().commit(index)); - } - /// Updates the role of the node based on the current membership state pub(crate) fn update_role(&self) { let ms = self.ms.read(); diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 4804dddde..b6ec1ecea 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -788,7 +788,7 @@ impl RawCurp { let mut log_w = RwLockUpgradableReadGuard::upgrade(log_r); if last_sent_index > log_w.commit_index { log_w.commit_to(last_sent_index); - self.update_membership_indices(None, Some(last_sent_index)); + self.update_membership_state(None, None, Some(last_sent_index)); debug!("{} updates commit index to {last_sent_index}", self.id()); self.apply(&mut *log_w); } diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index 16a2459bd..90f5c4a8c 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -676,18 +676,14 @@ fn add_node_should_add_new_node_to_curp() { let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; let original_membership = Membership::new(vec![(0..3).collect()], BTreeMap::default()); let membership = Membership::new(vec![(0..4).collect()], BTreeMap::default()); - let _ignore = curp - .append_to_membership_states(Some((2, membership))) - .unwrap(); + let _ignore = curp.update_membership_state(None, Some((2, membership)), None); assert!(curp .effective_membership() .members .iter() .flatten() .any(|id| *id == 3)); - let _ignore = curp - .append_to_membership_states(Some((1, original_membership))) - .unwrap(); + let _ignore = curp.update_membership_state(None, Some((1, original_membership)), None); assert!(!curp .effective_membership() .members @@ -705,23 +701,19 @@ fn add_learner_node_and_promote_should_success() { .generate_membership(Some(Change::Add(Node::new(3, NodeMetadata::default())))) .pop() .unwrap(); - let _ignore = curp - .append_to_membership_states(Some((1, membership))) - .unwrap(); + let _ignore = curp.update_membership_state(None, Some((1, membership)), None); assert!(!curp .effective_membership() .members .iter() .flatten() .any(|id| *id == 3)); - curp.update_membership_indices(None, Some(1)); + curp.update_membership_state(None, None, Some(1)); let membership = curp .generate_membership(Some(Change::Promote(3))) .pop() .unwrap(); - let _ignore = curp - .append_to_membership_states(Some((2, membership))) - .unwrap(); + let _ignore = curp.update_membership_state(None, Some((2, membership)), None); assert!(curp .effective_membership() .members @@ -757,9 +749,7 @@ fn remove_node_should_remove_node_from_curp() { .generate_membership(Some(Change::Demote(follower_id))) .pop() .unwrap(); - let _ignore = curp - .append_to_membership_states(Some((1, membership))) - .unwrap(); + let _ignore = curp.update_membership_state(None, Some((1, membership)), None); assert!(!curp .effective_membership() .members @@ -814,9 +804,7 @@ fn leader_handle_move_leader() { .generate_membership(Some(Change::Add(Node::new(1234, NodeMetadata::default())))) .pop() .unwrap(); - let _ignore = curp - .append_to_membership_states(Some((1, membership))) - .unwrap(); + let _ignore = curp.update_membership_state(None, Some((1, membership)), None); let res = curp.handle_move_leader(1234); assert!(res.is_err()); @@ -864,9 +852,7 @@ fn leader_will_reset_transferee_after_remove_node() { vec![(0..5).filter(|id| *id != target_id).collect()], BTreeMap::default(), ); - let _ignore = curp - .append_to_membership_states(Some((1, membership))) - .unwrap(); + let _ignore = curp.update_membership_state(None, Some((1, membership)), None); curp.update_transferee(); assert!(curp.get_transferee().is_none()); } From 95d68bea15bd192d8daaacfeda6241e405169c66 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:12:32 +0800 Subject: [PATCH 219/322] fix: connect nodes should exclude self Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/member_impl.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 8a32cf5d8..5a3645493 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -102,7 +102,7 @@ impl, RC: RoleChange> CurpNode { impl, RC: RoleChange> CurpNode { /// Updates the membership config pub(crate) fn update_states_with_membership(&self, membership: &Membership) { - let connects = self.connect_nodes(membership); + let connects = self.connect_other_nodes(membership); let new_states = self.curp.update_node_states(connects); self.spawn_sync_follower_tasks(new_states.into_values()); self.curp.update_role(); @@ -126,15 +126,17 @@ impl, RC: RoleChange> CurpNode { }) } - /// Connect to nodes of the given membership config - pub(crate) fn connect_nodes( + /// Establishes connections to all nodes specified in the membership configuration, + /// excluding the current node. + pub(crate) fn connect_other_nodes( &self, config: &Membership, ) -> BTreeMap { + let self_id = self.curp.id(); let nodes = config .nodes .iter() - .map(|(id, meta)| (*id, meta.peer_urls().to_vec())) + .filter_map(|(id, meta)| (*id != self_id).then_some((*id, meta.peer_urls().to_vec()))) .collect(); inner_connects(nodes, self.curp.client_tls_config()).collect() From 3acbf221f84c0f44c55899d57f4c605aada45722 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 30 Sep 2024 17:03:25 +0800 Subject: [PATCH 220/322] refactor: Fetch implementation Fetch should use the new quroum definition instead of the old one. Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/cluster_state.rs | 40 +++++++++++++++++++++++++ crates/curp/src/client/fetch.rs | 25 ++++++++-------- crates/curp/src/lib.rs | 32 -------------------- 3 files changed, 52 insertions(+), 45 deletions(-) diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index 237688fea..386fe75e2 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -160,6 +160,46 @@ impl ClusterStateReady { .collect() } + /// Execute an operation on each node, until a quorum is reached. + /// + /// Parameters: + /// - f: Operation to execute on each follower's connection + /// - filter: Function to filter on each response + /// - quorum: Function to determine if a quorum is reached, use functions in `QuorumSet` trait + /// + /// Returns `true` if then given quorum is reached. + pub(crate) async fn for_each_node_with_quorum, F, Q>( + self, + mut f: impl FnMut(Arc) -> Fut, + mut filter: F, + mut expect_quorum: Q, + ) -> bool + where + F: FnMut(R) -> bool, + Q: FnMut(&dyn QuorumSet>, Vec) -> bool, + { + let qs = self.membership.as_joint(); + + #[allow(clippy::pattern_type_mismatch)] + let futs: FuturesUnordered<_> = self + .member_connects() + .map(|(id, conn)| f(Arc::clone(conn)).map(move |r| (id, r))) + .collect(); + + let mut filtered = + futs.filter_map(|(id, r)| futures::future::ready(filter(r).then_some(id))); + + let mut ids = vec![]; + while let Some(id) = filtered.next().await { + ids.push(id); + if expect_quorum(&qs, ids.clone()) { + return true; + } + } + + false + } + /// Execute an operation on each follower, until a quorum is reached. /// /// Parameters: diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index 2f07fb801..8a5f34de7 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -8,7 +8,7 @@ use tracing::warn; use utils::parking_lot_lock::RwLockMap; use crate::{ - quorum, + quorum::{self, QuorumSet}, rpc::{self, connect::ConnectApi, CurpError, FetchMembershipRequest, FetchMembershipResponse}, }; @@ -90,7 +90,7 @@ impl Fetch { connects, resp.clone().into_membership(), ); - if self.fetch_term(&new_state).await { + if self.fetch_term(new_state.clone()).await { return Ok((new_state, resp)); } @@ -98,20 +98,19 @@ impl Fetch { } /// Fetch the term of the cluster. This ensures that the current leader is the latest. - async fn fetch_term(&self, state: &ClusterStateReady) -> bool { + async fn fetch_term(&self, state: ClusterStateReady) -> bool { let timeout = self.timeout; let term = state.term(); - let quorum = state.get_quorum(quorum); + let fetch_membership = |c: Arc| async move { + c.fetch_membership(FetchMembershipRequest {}, timeout).await + }; + state - .for_each_server(|c| async move { - c.fetch_membership(FetchMembershipRequest {}, timeout).await - }) - .filter_map(|r| future::ready(r.ok())) - .map(Response::into_inner) - .filter(move |resp| future::ready(resp.term == term)) - .take(quorum) - .count() - .map(move |t| t >= quorum) + .for_each_node_with_quorum( + fetch_membership, + |r| r.is_ok_and(|ok| ok.get_ref().term == term), + |qs, ids| QuorumSet::is_quorum(qs, ids), + ) .await } diff --git a/crates/curp/src/lib.rs b/crates/curp/src/lib.rs index dc715ebbd..598fdd0b5 100644 --- a/crates/curp/src/lib.rs +++ b/crates/curp/src/lib.rs @@ -211,35 +211,3 @@ pub mod member; /// Quorum definitions mod quorum; - -/// Calculate the quorum -#[inline] -#[must_use] -#[allow(clippy::arithmetic_side_effects)] // it's safe -fn quorum(size: usize) -> usize { - size / 2 + 1 -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn quorum_should_work() { - let nodes = vec![1, 2, 3, 4, 5, 6, 7, 10]; - // (quorum, recover_quorum, super_quorum) - let expected_res = vec![ - (1, 1, 1), - (2, 2, 2), - (2, 2, 3), - (3, 2, 3), - (3, 2, 4), - (4, 3, 5), - (4, 3, 6), - ]; - - for (node_cnt, expected) in nodes.into_iter().zip(expected_res.into_iter()) { - assert_eq!(quorum(node_cnt), expected.0); - } - } -} From 87bd37b9733784f0045518e360a351ecc039e19f Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 30 Sep 2024 18:20:04 +0800 Subject: [PATCH 221/322] refactor: fetch cluster Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/cluster_state.rs | 52 +++-------- crates/curp/src/client/fetch.rs | 118 +++++++++++++++--------- crates/curp/src/client/retry.rs | 2 +- 3 files changed, 87 insertions(+), 85 deletions(-) diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index 386fe75e2..ed80f23dd 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -27,6 +27,18 @@ pub(crate) enum ClusterState { Ready(ClusterStateReady), } +impl From for ClusterState { + fn from(init: ClusterStateInit) -> Self { + ClusterState::Init(init) + } +} + +impl From for ClusterState { + fn from(ready: ClusterStateReady) -> Self { + ClusterState::Ready(ready) + } +} + impl ForEachServer for ClusterState { fn for_each_server>( &self, @@ -160,46 +172,6 @@ impl ClusterStateReady { .collect() } - /// Execute an operation on each node, until a quorum is reached. - /// - /// Parameters: - /// - f: Operation to execute on each follower's connection - /// - filter: Function to filter on each response - /// - quorum: Function to determine if a quorum is reached, use functions in `QuorumSet` trait - /// - /// Returns `true` if then given quorum is reached. - pub(crate) async fn for_each_node_with_quorum, F, Q>( - self, - mut f: impl FnMut(Arc) -> Fut, - mut filter: F, - mut expect_quorum: Q, - ) -> bool - where - F: FnMut(R) -> bool, - Q: FnMut(&dyn QuorumSet>, Vec) -> bool, - { - let qs = self.membership.as_joint(); - - #[allow(clippy::pattern_type_mismatch)] - let futs: FuturesUnordered<_> = self - .member_connects() - .map(|(id, conn)| f(Arc::clone(conn)).map(move |r| (id, r))) - .collect(); - - let mut filtered = - futs.filter_map(|(id, r)| futures::future::ready(filter(r).then_some(id))); - - let mut ids = vec![]; - while let Some(id) = filtered.next().await { - ids.push(id); - if expect_quorum(&qs, ids.clone()) { - return true; - } - } - - false - } - /// Execute an operation on each follower, until a quorum is reached. /// /// Parameters: diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index 8a5f34de7..a5be3abd1 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -1,7 +1,7 @@ use std::{collections::HashMap, sync::Arc, time::Duration}; use curp_external_api::cmd::Command; -use futures::{future, FutureExt, StreamExt}; +use futures::{future, Future, FutureExt, StreamExt}; use parking_lot::RwLock; use tonic::Response; use tracing::warn; @@ -12,7 +12,7 @@ use crate::{ rpc::{self, connect::ConnectApi, CurpError, FetchMembershipRequest, FetchMembershipResponse}, }; -use super::cluster_state::{ClusterState, ClusterStateReady, ForEachServer}; +use super::cluster_state::{ClusterState, ClusterStateInit, ClusterStateReady, ForEachServer}; use super::config::Config; /// Connect to cluster @@ -77,59 +77,88 @@ impl Fetch { /// Fetch cluster and updates the current state pub(crate) async fn fetch_cluster( &self, - state: impl ForEachServer, + state: impl Into, ) -> Result<(ClusterStateReady, FetchMembershipResponse), CurpError> { - let resp = self - .pre_fetch(&state) - .await - .ok_or(CurpError::internal("cluster not available"))?; - let connects = (self.connect_to)(&resp); - let new_state = ClusterStateReady::new( - resp.leader_id, - resp.term, - connects, - resp.clone().into_membership(), - ); - if self.fetch_term(new_state.clone()).await { - return Ok((new_state, resp)); + let state = match state.into() { + ClusterState::Init(state) => { + let resp = self + .fetch_one(&state) + .await + .ok_or(CurpError::internal("cluster not available"))?; + Self::build_cluster_state_from_response(self.connect_to.as_ref(), resp.clone()) + } + ClusterState::Ready(state) => state, + }; + + let (fetch_leader, term_ok) = + tokio::join!(self.fetch_from_leader(&state), self.fetch_term(state)); + + if term_ok { + return fetch_leader; + } + + let (leader_state, resp) = fetch_leader?; + if self.fetch_term(leader_state.clone()).await { + return Ok((leader_state, resp)); } Err(CurpError::internal("cluster not available")) } /// Fetch the term of the cluster. This ensures that the current leader is the latest. - async fn fetch_term(&self, state: ClusterStateReady) -> bool { + fn fetch_term(&self, state: ClusterStateReady) -> impl Future { let timeout = self.timeout; let term = state.term(); - let fetch_membership = |c: Arc| async move { + let fetch_membership = move |c: Arc| async move { c.fetch_membership(FetchMembershipRequest {}, timeout).await }; - state - .for_each_node_with_quorum( - fetch_membership, - |r| r.is_ok_and(|ok| ok.get_ref().term == term), - |qs, ids| QuorumSet::is_quorum(qs, ids), - ) - .await + state.for_each_follower_with_quorum( + fetch_membership, + move |r| r.is_ok_and(|ok| ok.get_ref().term == term), + |qs, ids| QuorumSet::is_quorum(qs, ids), + ) + } + + /// Fetch cluster state from leader + fn fetch_from_leader( + &self, + state: &ClusterStateReady, + ) -> impl Future> { + let timeout = self.timeout; + let connect_to = self.connect_to.clone_box(); + state.map_leader(|c| async move { + let result = c.fetch_membership(FetchMembershipRequest {}, timeout).await; + result.map(|resp| { + let resp = resp.into_inner(); + let fetch_state = + Self::build_cluster_state_from_response(connect_to.as_ref(), resp.clone()); + (fetch_state, resp) + }) + }) } - /// Prefetch, send fetch cluster request to the cluster and get the - /// config with the greatest quorum. - async fn pre_fetch(&self, state: &impl ForEachServer) -> Option { + /// Sends fetch membership request to the cluster, and returns the first response + async fn fetch_one(&self, state: &ClusterStateInit) -> Option { let timeout = self.timeout; - let requests = state.for_each_server(|c| async move { + let request_futs = state.for_each_server(|c| async move { c.fetch_membership(FetchMembershipRequest {}, timeout).await }); - let responses: Vec<_> = requests - .filter_map(|r| future::ready(r.ok())) + + request_futs + .filter_map(|req| future::ready(req.ok())) + .next() + .await .map(Response::into_inner) - .collect() - .await; - responses - .into_iter() - .filter(|resp| !resp.members.is_empty()) - .max_by(|x, y| x.term.cmp(&y.term)) + } + + /// Build `ClusterStateReady` from `FetchMembershipResponse` + fn build_cluster_state_from_response( + connect_to: &dyn ConnectToCluster, + resp: FetchMembershipResponse, + ) -> ClusterStateReady { + let connects = (connect_to)(&resp); + ClusterStateReady::new(resp.leader_id, resp.term, connects, resp.into_membership()) } } @@ -149,7 +178,11 @@ mod test { use tracing_test::traced_test; use crate::{ - client::{cluster_state::ForEachServer, config::Config, tests::init_mocked_connects}, + client::{ + cluster_state::{ClusterState, ClusterStateInit, ForEachServer}, + config::Config, + tests::init_mocked_connects, + }, rpc::{ self, connect::ConnectApi, CurpError, FetchMembershipResponse, Member, Node, NodeMetadata, @@ -158,12 +191,9 @@ mod test { use super::Fetch; - impl ForEachServer for HashMap> { - fn for_each_server>( - &self, - f: impl FnMut(Arc) -> F, - ) -> FuturesUnordered { - self.values().cloned().map(f).collect() + impl From>> for ClusterState { + fn from(connects: HashMap>) -> Self { + ClusterState::Init(ClusterStateInit::new(connects.into_values().collect())) } } diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 653f7204a..10ad19bf6 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -427,7 +427,7 @@ where linearizable: bool, ) -> Result { self.retry::<_, _>(|client, ctx| async move { - let (_, resp) = self.fetch.fetch_cluster(ctx.cluster_state()).await?; + let (_, resp) = self.fetch.fetch_cluster(ClusterState::Ready(ctx.cluster_state())).await?; Ok(resp) }) .await From 6efdd5c144aef7211d3cbab341dc6c3daa5bcda1 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 8 Oct 2024 09:22:45 +0800 Subject: [PATCH 222/322] refactor: check cluster version during propose Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/cluster_state.rs | 13 ++++++++++++- crates/curp/src/client/unary/propose_impl.rs | 3 ++- crates/curp/src/member.rs | 15 +++++++++++++++ crates/curp/src/server/curp_node/mod.rs | 1 + crates/curp/src/server/raw_curp/mod.rs | 10 ++++++++++ 5 files changed, 40 insertions(+), 2 deletions(-) diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index ed80f23dd..973b719cb 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -1,4 +1,8 @@ -use std::{collections::HashMap, sync::Arc}; +use std::{ + collections::{hash_map::DefaultHasher, HashMap}, + hash::{Hash, Hasher}, + sync::Arc, +}; use futures::{stream::FuturesUnordered, Future, FutureExt, StreamExt}; @@ -239,4 +243,11 @@ impl ClusterStateReady { pub(crate) fn leader_id(&self) -> u64 { self.leader } + + /// Calculates the cluster version + /// + /// The cluster version is a hash of the current `Membership` + pub(crate) fn cluster_version(&self) -> u64 { + self.membership.version() + } } diff --git a/crates/curp/src/client/unary/propose_impl.rs b/crates/curp/src/client/unary/propose_impl.rs index bdcf144b4..0831f117d 100644 --- a/crates/curp/src/client/unary/propose_impl.rs +++ b/crates/curp/src/client/unary/propose_impl.rs @@ -136,10 +136,11 @@ impl Unary { ctx: &Context, ) -> EventStream<'_, C> { let term = ctx.cluster_state().term(); + let cluster_version = ctx.cluster_state().cluster_version(); let propose_req = ProposeRequest::new::( ctx.propose_id(), cmd, - 0, + cluster_version, term, !use_fast_path, ctx.first_incomplete(), diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 8be7423ff..1b295fbf6 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -188,6 +188,13 @@ impl MembershipState { &self.last().membership } + /// Calculates the cluster version + /// + /// The cluster version is a hash of the effective `Membership` + pub(crate) fn cluster_version(&self) -> u64 { + self.effective().version() + } + /// Gets the last entry fn last(&self) -> &MembershipEntry { self.entries.last().unwrap() @@ -317,6 +324,14 @@ impl Membership { pub(crate) fn contains_member(&self, node_id: u64) -> bool { self.members.iter().any(|s| s.contains(&node_id)) } + + /// Calculates the version of this membership + pub(crate) fn version(&self) -> u64 { + // TODO: handle conflict? + let mut hasher = DefaultHasher::new(); + self.hash(&mut hasher); + hasher.finish() + } } /// Trait for types that can provide a cluster ID. diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 235a632a8..76a2de593 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -149,6 +149,7 @@ impl, RC: RoleChange> CurpNode { } self.curp.check_leader_transfer()?; self.curp.check_term(req.term)?; + self.curp.check_cluster_version(req.cluster_version)?; if req.slow_path { resp_tx.set_conflict(true); diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index b6ec1ecea..97fa73129 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -1148,6 +1148,16 @@ impl RawCurp { let ms_r = self.ms.read(); self.become_candidate(&mut st_w, &mut cst_l, log_r, &ms_r) } + + /// Returns `CurpError::WrongClusterVersion` if the give cluster version does not match the + /// effective membership version of the current node. + pub(super) fn check_cluster_version(&self, cluster_version: u64) -> Result<(), CurpError> { + if self.ms.read().cluster().cluster_version() == cluster_version { + return Ok(()); + } + + Err(CurpError::wrong_cluster_version()) + } } /// Other small public interface From df73b7815466953eef056bea7312eef56397c47a Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 8 Oct 2024 10:20:37 +0800 Subject: [PATCH 223/322] fix: client record conflict Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/unary/propose_impl.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/curp/src/client/unary/propose_impl.rs b/crates/curp/src/client/unary/propose_impl.rs index 0831f117d..159f1d69f 100644 --- a/crates/curp/src/client/unary/propose_impl.rs +++ b/crates/curp/src/client/unary/propose_impl.rs @@ -195,7 +195,7 @@ impl Unary { |res| res.is_ok_and(|resp| !resp.get_ref().conflict), |qs, ids| QuorumSet::is_super_quorum(qs, ids), ) - .map(move |conflict| ProposeEvent::Record { conflict }) + .map(move |ok| ProposeEvent::Record { conflict: !ok }) .map(Ok) .into_stream(); From 8592d841eb215d9a9c210a50cdf7c01496c75494 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 8 Oct 2024 10:27:07 +0800 Subject: [PATCH 224/322] fix: client fetch membership should fetch largest term Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/fetch.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index a5be3abd1..b66335df8 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -141,15 +141,18 @@ impl Fetch { /// Sends fetch membership request to the cluster, and returns the first response async fn fetch_one(&self, state: &ClusterStateInit) -> Option { let timeout = self.timeout; - let request_futs = state.for_each_server(|c| async move { - c.fetch_membership(FetchMembershipRequest {}, timeout).await - }); + let resps: Vec<_> = state + .for_each_server(|c| async move { + c.fetch_membership(FetchMembershipRequest {}, timeout).await + }) + .collect() + .await; - request_futs - .filter_map(|req| future::ready(req.ok())) - .next() - .await + resps + .into_iter() + .filter_map(Result::ok) .map(Response::into_inner) + .max_by(|x, y| x.term.cmp(&y.term)) } /// Build `ClusterStateReady` from `FetchMembershipResponse` From 5401535217a3f9ba118feed41801354b07bf24f2 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 8 Oct 2024 11:43:58 +0800 Subject: [PATCH 225/322] refactor: implement lazy connect in rpc::connect The tonic::transport::Channel::balance_channel requires an async runtime. This makes the connection lazily buildable for use in a synchronous context. Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/rpc/connect/lazy.rs | 257 ++++++++++++++++++++++++++++ crates/curp/src/rpc/connect/mod.rs | 7 +- 2 files changed, 262 insertions(+), 2 deletions(-) create mode 100644 crates/curp/src/rpc/connect/lazy.rs diff --git a/crates/curp/src/rpc/connect/lazy.rs b/crates/curp/src/rpc/connect/lazy.rs new file mode 100644 index 000000000..5ca26bd98 --- /dev/null +++ b/crates/curp/src/rpc/connect/lazy.rs @@ -0,0 +1,257 @@ +use std::time::Duration; + +use async_trait::async_trait; +use futures::Stream; +use tonic::transport::Channel; +#[cfg(not(madsim))] +use tonic::transport::ClientTlsConfig; +#[cfg(madsim)] +use utils::ClientTlsConfig; + +use crate::{ + rpc::{ + proto::{ + commandpb::protocol_client::ProtocolClient, + inner_messagepb::inner_protocol_client::InnerProtocolClient, + }, + AppendEntriesRequest, AppendEntriesResponse, ChangeMembershipRequest, + ChangeMembershipResponse, CurpError, FetchMembershipRequest, FetchMembershipResponse, + FetchReadStateRequest, FetchReadStateResponse, InstallSnapshotResponse, MoveLeaderRequest, + MoveLeaderResponse, OpResponse, ProposeRequest, ReadIndexResponse, RecordRequest, + RecordResponse, ShutdownRequest, ShutdownResponse, VoteRequest, VoteResponse, + }, + snapshot::Snapshot, +}; + +use super::{connect_to, Connect, ConnectApi, FromTonicChannel, InnerConnectApi}; + +/// A structure that lazily establishes a connection to a server. +pub(super) struct ConnectLazy { + // Configs + /// The node id + id: u64, + /// Node addrs + addrs: Vec, + /// The TLS config + tls_config: Option, + + /// The connection + inner: tokio::sync::Mutex>>, +} + +impl ConnectLazy { + /// Lazily establishes a connection to the specified server. + pub(super) fn connect( + id: u64, + addrs: Vec, + tls_config: Option, + ) -> Self { + Self { + id, + addrs, + tls_config, + inner: tokio::sync::Mutex::new(None), + } + } +} + +impl ConnectLazy +where + C: FromTonicChannel, +{ + /// Establishes a connection if it does not already exist. + fn connect_inner(&self, inner: &mut Option>) { + if inner.is_none() { + let connect = connect_to(self.id, self.addrs.clone(), self.tls_config.clone()); + *inner = Some(connect); + } + } +} + +#[allow(clippy::unwrap_used)] +#[async_trait] +impl InnerConnectApi for ConnectLazy> { + fn id(&self) -> u64 { + self.id + } + + async fn update_addrs(&self, addrs: Vec) -> Result<(), tonic::transport::Error> { + let mut inner = self.inner.lock().await; + self.connect_inner(&mut inner); + inner.as_ref().unwrap().update_addrs(addrs).await + } + + async fn append_entries( + &self, + request: AppendEntriesRequest, + timeout: Duration, + ) -> Result, tonic::Status> { + let mut inner = self.inner.lock().await; + self.connect_inner(&mut inner); + inner + .as_ref() + .unwrap() + .append_entries(request, timeout) + .await + } + + async fn vote( + &self, + request: VoteRequest, + timeout: Duration, + ) -> Result, tonic::Status> { + let mut inner = self.inner.lock().await; + self.connect_inner(&mut inner); + inner.as_ref().unwrap().vote(request, timeout).await + } + + async fn install_snapshot( + &self, + term: u64, + leader_id: u64, + snapshot: Snapshot, + ) -> Result, tonic::Status> { + let mut inner = self.inner.lock().await; + self.connect_inner(&mut inner); + inner + .as_ref() + .unwrap() + .install_snapshot(term, leader_id, snapshot) + .await + } + + async fn trigger_shutdown(&self) -> Result<(), tonic::Status> { + let mut inner = self.inner.lock().await; + self.connect_inner(&mut inner); + inner.as_ref().unwrap().trigger_shutdown().await + } + + async fn try_become_leader_now(&self, timeout: Duration) -> Result<(), tonic::Status> { + let mut inner = self.inner.lock().await; + self.connect_inner(&mut inner); + inner.as_ref().unwrap().try_become_leader_now(timeout).await + } +} + +#[allow(clippy::unwrap_used)] +#[async_trait] +impl ConnectApi for ConnectLazy> { + fn id(&self) -> u64 { + self.id + } + + async fn update_addrs(&self, addrs: Vec) -> Result<(), tonic::transport::Error> { + let mut inner = self.inner.lock().await; + self.connect_inner(&mut inner); + inner.as_ref().unwrap().update_addrs(addrs).await + } + + async fn propose_stream( + &self, + request: ProposeRequest, + token: Option, + timeout: Duration, + ) -> Result< + tonic::Response> + Send>>, + CurpError, + > { + let mut inner = self.inner.lock().await; + self.connect_inner(&mut inner); + inner + .as_ref() + .unwrap() + .propose_stream(request, token, timeout) + .await + } + + async fn record( + &self, + request: RecordRequest, + timeout: Duration, + ) -> Result, CurpError> { + let mut inner = self.inner.lock().await; + self.connect_inner(&mut inner); + inner.as_ref().unwrap().record(request, timeout).await + } + + async fn read_index( + &self, + timeout: Duration, + ) -> Result, CurpError> { + let mut inner = self.inner.lock().await; + self.connect_inner(&mut inner); + inner.as_ref().unwrap().read_index(timeout).await + } + + async fn shutdown( + &self, + request: ShutdownRequest, + timeout: Duration, + ) -> Result, CurpError> { + let mut inner = self.inner.lock().await; + self.connect_inner(&mut inner); + inner.as_ref().unwrap().shutdown(request, timeout).await + } + + async fn fetch_read_state( + &self, + request: FetchReadStateRequest, + timeout: Duration, + ) -> Result, CurpError> { + let mut inner = self.inner.lock().await; + self.connect_inner(&mut inner); + inner + .as_ref() + .unwrap() + .fetch_read_state(request, timeout) + .await + } + + async fn move_leader( + &self, + request: MoveLeaderRequest, + timeout: Duration, + ) -> Result, CurpError> { + let mut inner = self.inner.lock().await; + self.connect_inner(&mut inner); + inner.as_ref().unwrap().move_leader(request, timeout).await + } + + async fn lease_keep_alive(&self, client_id: u64, interval: Duration) -> Result { + let mut inner = self.inner.lock().await; + self.connect_inner(&mut inner); + inner + .as_ref() + .unwrap() + .lease_keep_alive(client_id, interval) + .await + } + + async fn fetch_membership( + &self, + request: FetchMembershipRequest, + timeout: Duration, + ) -> Result, CurpError> { + let mut inner = self.inner.lock().await; + self.connect_inner(&mut inner); + inner + .as_ref() + .unwrap() + .fetch_membership(request, timeout) + .await + } + + async fn change_membership( + &self, + request: ChangeMembershipRequest, + timeout: Duration, + ) -> Result, CurpError> { + let mut inner = self.inner.lock().await; + self.connect_inner(&mut inner); + inner + .as_ref() + .unwrap() + .change_membership(request, timeout) + .await + } +} diff --git a/crates/curp/src/rpc/connect/mod.rs b/crates/curp/src/rpc/connect/mod.rs index af1c7a528..48258ceed 100644 --- a/crates/curp/src/rpc/connect/mod.rs +++ b/crates/curp/src/rpc/connect/mod.rs @@ -1,3 +1,6 @@ +/// Lazy connect implementation +mod lazy; + use std::{ collections::{HashMap, HashSet}, fmt::{Debug, Formatter}, @@ -148,9 +151,9 @@ pub(crate) fn inner_connects( members.into_iter().map(move |(id, addrs)| { ( id, - InnerConnectApiWrapper::new_from_arc(Arc::new(connect_to::< + InnerConnectApiWrapper::new_from_arc(Arc::new(lazy::ConnectLazy::< InnerProtocolClient, - >( + >::connect( id, addrs, tls_config.clone() ))), ) From d4b53fcaec28198958d280fe4710fba220007c19 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 8 Oct 2024 15:22:01 +0800 Subject: [PATCH 226/322] test: return fetch membership response in MockStreamConnectApi Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/keep_alive.rs | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index d7ea6cfb6..5b8e5f934 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -157,14 +157,17 @@ mod tests { rpc::{ connect::ConnectApi, ChangeMembershipRequest, ChangeMembershipResponse, FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, - FetchReadStateResponse, MoveLeaderRequest, MoveLeaderResponse, NodeMetadata, - OpResponse, ProposeRequest, ReadIndexResponse, RecordRequest, RecordResponse, - ShutdownRequest, ShutdownResponse, + FetchReadStateResponse, MoveLeaderRequest, MoveLeaderResponse, Node, NodeMetadata, + OpResponse, ProposeRequest, QuorumSet, ReadIndexResponse, RecordRequest, + RecordResponse, ShutdownRequest, ShutdownResponse, }, }; struct MockedStreamConnectApi { id: u64, + leader_id: u64, + term: u64, + size: usize, lease_keep_alive_handle: Box BoxFuture<'static, Result> + Send + Sync + 'static>, } @@ -252,7 +255,21 @@ mod tests { _request: FetchMembershipRequest, _timeout: Duration, ) -> Result, CurpError> { - unreachable!("please use MockedConnectApi") + let ids = (0..self.size as u64); + let qs = QuorumSet { + set: ids.clone().collect(), + }; + let nodes = ids + .map(|node_id| Node::new(node_id, NodeMetadata::default())) + .collect(); + let resp = FetchMembershipResponse { + term: self.term, + leader_id: self.leader_id, + members: vec![qs], + nodes, + }; + + Ok(tonic::Response::new(resp)) } async fn change_membership( @@ -285,6 +302,9 @@ mod tests { (0..size) .map(|id| MockedStreamConnectApi { id: id as u64, + leader_id: leader_idx as u64, + term: leader_term, + size, lease_keep_alive_handle: if id == leader_idx { Box::new(keep_alive_handle.take().unwrap()) } else { From 4306127dc3d2777aa3ee03b36755cc8b7a1be15b Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 8 Oct 2024 15:27:36 +0800 Subject: [PATCH 227/322] test: commit log index in membership tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/raw_curp/tests.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index 90f5c4a8c..130f04f70 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -708,6 +708,7 @@ fn add_learner_node_and_promote_should_success() { .iter() .flatten() .any(|id| *id == 3)); + curp.log.write().commit_to(1); curp.update_membership_state(None, None, Some(1)); let membership = curp .generate_membership(Some(Change::Promote(3))) From c9a4065a35e09f87fa6dcb03f171308362b89397 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 8 Oct 2024 16:04:38 +0800 Subject: [PATCH 228/322] test: do not update follower state in leader_handle_move_leader Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/raw_curp/tests.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index 130f04f70..0df65bea2 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -814,9 +814,6 @@ fn leader_handle_move_leader() { assert!(res.is_err()); let target_id = curp.get_id_by_name("S1").unwrap(); - let _ignore = curp - .handle_append_entries_resp(target_id, Some(1), 1, true, 1) - .unwrap(); let res = curp.handle_move_leader(target_id); // need to send try become leader now after handle_move_leader assert!(res.is_ok_and(|b| b)); From 17ad1e2522b826d91a32e882bd6bc7085ce0cb9c Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 8 Oct 2024 16:52:05 +0800 Subject: [PATCH 229/322] refactor:update client membership state when client fetch membership * The ChangeMembership RPC will return the latest membership, the client should update its state after calling ChangeMembership * Merge response type of FetchMembership and ChangeMembership RPCs into MembershipResponse Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/proto/common | 2 +- crates/curp/src/client/connect.rs | 9 ++-- crates/curp/src/client/fetch.rs | 47 ++++++++++--------- crates/curp/src/client/keep_alive.rs | 15 +++--- crates/curp/src/client/mod.rs | 4 +- crates/curp/src/client/retry.rs | 18 +++++-- crates/curp/src/client/tests.rs | 8 ++-- crates/curp/src/client/unary/mod.rs | 11 +++-- crates/curp/src/rpc/connect/lazy.rs | 14 +++--- crates/curp/src/rpc/connect/mod.rs | 16 +++---- crates/curp/src/rpc/mod.rs | 5 +- crates/curp/src/rpc/reconnect.rs | 12 ++--- .../curp/src/server/curp_node/member_impl.rs | 36 ++++++++++++-- crates/curp/src/server/curp_node/mod.rs | 36 +++----------- crates/curp/src/server/mod.rs | 7 ++- crates/curp/tests/it/common/curp_group.rs | 6 +-- crates/xline/src/server/auth_wrapper.rs | 13 +++-- 17 files changed, 134 insertions(+), 125 deletions(-) diff --git a/crates/curp/proto/common b/crates/curp/proto/common index 81707530d..047ee94f2 160000 --- a/crates/curp/proto/common +++ b/crates/curp/proto/common @@ -1 +1 @@ -Subproject commit 81707530d471e77c0e57187d11ee0f0874a73177 +Subproject commit 047ee94f28cb92e7df05dd5c907d9f06620a09ba diff --git a/crates/curp/src/client/connect.rs b/crates/curp/src/client/connect.rs index aa3748a5a..a59301228 100644 --- a/crates/curp/src/client/connect.rs +++ b/crates/curp/src/client/connect.rs @@ -3,7 +3,7 @@ use curp_external_api::cmd::Command; use crate::{ members::ServerId, - rpc::{Change, FetchMembershipResponse, ReadState}, + rpc::{Change, MembershipResponse, ReadState}, }; use super::retry::Context; @@ -46,10 +46,7 @@ pub trait ClientApi { /// know who the leader is.) /// /// Note: The fetched cluster may still be outdated if `linearizable` is false - async fn fetch_cluster( - &self, - linearizable: bool, - ) -> Result; + async fn fetch_cluster(&self, linearizable: bool) -> Result; /// Fetch leader id #[inline] @@ -100,5 +97,5 @@ pub(crate) trait RepeatableClientApi { &self, changes: Vec, ctx: Context, - ) -> Result<(), Self::Error>; + ) -> Result; } diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index b66335df8..45e56cf1f 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -9,7 +9,7 @@ use utils::parking_lot_lock::RwLockMap; use crate::{ quorum::{self, QuorumSet}, - rpc::{self, connect::ConnectApi, CurpError, FetchMembershipRequest, FetchMembershipResponse}, + rpc::{self, connect::ConnectApi, CurpError, FetchMembershipRequest, MembershipResponse}, }; use super::cluster_state::{ClusterState, ClusterStateInit, ClusterStateReady, ForEachServer}; @@ -20,7 +20,7 @@ use super::config::Config; /// This is used to build a boxed closure that handles the `FetchClusterResponse` and returns /// new connections. pub(super) trait ConnectToCluster: - Fn(&FetchMembershipResponse) -> HashMap> + Send + Sync + 'static + Fn(&MembershipResponse) -> HashMap> + Send + Sync + 'static { /// Clone the value fn clone_box(&self) -> Box; @@ -28,11 +28,7 @@ pub(super) trait ConnectToCluster: impl ConnectToCluster for T where - T: Fn(&FetchMembershipResponse) -> HashMap> - + Clone - + Send - + Sync - + 'static, + T: Fn(&MembershipResponse) -> HashMap> + Clone + Send + Sync + 'static, { fn clone_box(&self) -> Box { Box::new(self.clone()) @@ -78,7 +74,7 @@ impl Fetch { pub(crate) async fn fetch_cluster( &self, state: impl Into, - ) -> Result<(ClusterStateReady, FetchMembershipResponse), CurpError> { + ) -> Result<(ClusterStateReady, MembershipResponse), CurpError> { let state = match state.into() { ClusterState::Init(state) => { let resp = self @@ -105,6 +101,21 @@ impl Fetch { Err(CurpError::internal("cluster not available")) } + // TODO: Separate the connect object into its own type + /// Returns a reference to the `ConnectToCluster` trait object. + pub(crate) fn connect_to(&self) -> &dyn ConnectToCluster { + self.connect_to.as_ref() + } + + /// Build `ClusterStateReady` from `MembershipResponse` + pub(crate) fn build_cluster_state_from_response( + connect_to: &dyn ConnectToCluster, + resp: MembershipResponse, + ) -> ClusterStateReady { + let connects = (connect_to)(&resp); + ClusterStateReady::new(resp.leader_id, resp.term, connects, resp.into_membership()) + } + /// Fetch the term of the cluster. This ensures that the current leader is the latest. fn fetch_term(&self, state: ClusterStateReady) -> impl Future { let timeout = self.timeout; @@ -124,7 +135,7 @@ impl Fetch { fn fetch_from_leader( &self, state: &ClusterStateReady, - ) -> impl Future> { + ) -> impl Future> { let timeout = self.timeout; let connect_to = self.connect_to.clone_box(); state.map_leader(|c| async move { @@ -139,7 +150,7 @@ impl Fetch { } /// Sends fetch membership request to the cluster, and returns the first response - async fn fetch_one(&self, state: &ClusterStateInit) -> Option { + async fn fetch_one(&self, state: &ClusterStateInit) -> Option { let timeout = self.timeout; let resps: Vec<_> = state .for_each_server(|c| async move { @@ -154,15 +165,6 @@ impl Fetch { .map(Response::into_inner) .max_by(|x, y| x.term.cmp(&y.term)) } - - /// Build `ClusterStateReady` from `FetchMembershipResponse` - fn build_cluster_state_from_response( - connect_to: &dyn ConnectToCluster, - resp: FetchMembershipResponse, - ) -> ClusterStateReady { - let connects = (connect_to)(&resp); - ClusterStateReady::new(resp.leader_id, resp.term, connects, resp.into_membership()) - } } impl std::fmt::Debug for Fetch { @@ -187,8 +189,7 @@ mod test { tests::init_mocked_connects, }, rpc::{ - self, connect::ConnectApi, CurpError, FetchMembershipResponse, Member, Node, - NodeMetadata, + self, connect::ConnectApi, CurpError, Member, MembershipResponse, Node, NodeMetadata, }, }; @@ -209,7 +210,7 @@ mod test { leader_id: Option, term: u64, members: impl IntoIterator, - ) -> Result, CurpError> { + ) -> Result, CurpError> { let leader_id = leader_id.ok_or(CurpError::leader_transfer("no current leader"))?; let members: Vec<_> = members.into_iter().collect(); @@ -223,7 +224,7 @@ mod test { .collect(); let qs = rpc::QuorumSet { set: members }; - let resp = FetchMembershipResponse { + let resp = MembershipResponse { members: vec![qs], nodes, term, diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index 5b8e5f934..9dfeae19c 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -155,11 +155,10 @@ mod tests { use crate::{ member::Membership, rpc::{ - connect::ConnectApi, ChangeMembershipRequest, ChangeMembershipResponse, - FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, - FetchReadStateResponse, MoveLeaderRequest, MoveLeaderResponse, Node, NodeMetadata, - OpResponse, ProposeRequest, QuorumSet, ReadIndexResponse, RecordRequest, - RecordResponse, ShutdownRequest, ShutdownResponse, + connect::ConnectApi, ChangeMembershipRequest, FetchMembershipRequest, + FetchReadStateRequest, FetchReadStateResponse, MembershipResponse, MoveLeaderRequest, + MoveLeaderResponse, Node, NodeMetadata, OpResponse, ProposeRequest, QuorumSet, + ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, ShutdownResponse, }, }; @@ -254,7 +253,7 @@ mod tests { &self, _request: FetchMembershipRequest, _timeout: Duration, - ) -> Result, CurpError> { + ) -> Result, CurpError> { let ids = (0..self.size as u64); let qs = QuorumSet { set: ids.clone().collect(), @@ -262,7 +261,7 @@ mod tests { let nodes = ids .map(|node_id| Node::new(node_id, NodeMetadata::default())) .collect(); - let resp = FetchMembershipResponse { + let resp = MembershipResponse { term: self.term, leader_id: self.leader_id, members: vec![qs], @@ -276,7 +275,7 @@ mod tests { &self, _request: ChangeMembershipRequest, _timeout: Duration, - ) -> Result, CurpError> { + ) -> Result, CurpError> { unreachable!("please use MockedConnectApi") } } diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index e22fe3e77..83222c93d 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -65,7 +65,7 @@ use crate::{ rpc::{ self, connect::{BypassedConnect, ConnectApi}, - FetchMembershipResponse, NodeMetadata, ProposeId, Protocol, + MembershipResponse, NodeMetadata, ProposeId, Protocol, }, server::StreamingProtocol, tracker::Tracker, @@ -239,7 +239,7 @@ impl ClientBuilder { ) -> impl ConnectToCluster { let tls_config = self.tls_config.clone(); let is_raw_curp = self.is_raw_curp; - move |resp: &FetchMembershipResponse| -> HashMap> { + move |resp: &MembershipResponse| -> HashMap> { resp.nodes .clone() .into_iter() diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 10ad19bf6..ca9f626b2 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -19,7 +19,7 @@ use super::{ }; use crate::{ members::ServerId, - rpc::{CurpError, ReadState, Redirect, ProposeId, FetchMembershipResponse, NodeMetadata, Node, Change}, tracker::Tracker, + rpc::{CurpError, ReadState, Redirect, ProposeId, MembershipResponse, NodeMetadata, Node, Change}, tracker::Tracker, }; /// Backoff config @@ -222,6 +222,11 @@ impl ClusterStateShared { ClusterState::Ready(ready) => Ok(ready), } } + + /// Updates the current state with the provided `ClusterStateReady`. + pub(crate) fn update_with(&self, cluster_state: ClusterStateReady) { + *self.inner.write() = ClusterState::Ready(cluster_state); + } } /// The retry client automatically retry the requests of the inner client api @@ -425,7 +430,7 @@ where async fn fetch_cluster( &self, linearizable: bool, - ) -> Result { + ) -> Result { self.retry::<_, _>(|client, ctx| async move { let (_, resp) = self.fetch.fetch_cluster(ClusterState::Ready(ctx.cluster_state())).await?; Ok(resp) @@ -436,10 +441,13 @@ where /// Performs membership change async fn change_membership(&self, changes: Vec) -> Result<(), Self::Error> { - self.retry::<_, _>(|client, ctx| client.change_membership(changes.clone(), ctx)) - .await - } + let resp = self.retry::<_, _>(|client, ctx| client.change_membership(changes.clone(), ctx)) + .await?; + let cluster_state = Fetch::build_cluster_state_from_response(self.fetch.connect_to(), resp); + self.cluster_state.update_with(cluster_state); + Ok(()) + } } /// Tests for backoff diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index 7f24a49d7..636c781b7 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -26,8 +26,8 @@ use crate::{ rpc::{ self, connect::{ConnectApi, MockConnectApi}, - CurpError, FetchMembershipResponse, Node, NodeMetadata, OpResponse, ProposeId, - ProposeResponse, ReadIndexResponse, RecordResponse, ResponseOp, SyncedResponse, + CurpError, MembershipResponse, Node, NodeMetadata, OpResponse, ProposeId, ProposeResponse, + ReadIndexResponse, RecordResponse, ResponseOp, SyncedResponse, }, }; @@ -103,7 +103,7 @@ fn build_membership_resp( leader_id: Option, term: u64, members: impl IntoIterator, -) -> Result, CurpError> { +) -> Result, CurpError> { let leader_id = leader_id.ok_or(CurpError::leader_transfer("no current leader"))?; let members: Vec<_> = members.into_iter().collect(); @@ -117,7 +117,7 @@ fn build_membership_resp( .collect(); let qs = rpc::QuorumSet { set: members }; - let resp = FetchMembershipResponse { + let resp = MembershipResponse { members: vec![qs], nodes, term, diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 5285237cd..728b2dff8 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -14,7 +14,7 @@ use super::{ }; use crate::rpc::{ Change, ChangeMembershipRequest, CurpError, FetchReadStateRequest, MembershipChange, - MoveLeaderRequest, ReadState, ShutdownRequest, + MembershipResponse, MoveLeaderRequest, ReadState, ShutdownRequest, }; /// The unary client @@ -113,18 +113,19 @@ impl RepeatableClientApi for Unary { &self, changes: Vec, ctx: Context, - ) -> Result<(), Self::Error> { + ) -> Result { let changes = changes .into_iter() .map(|c| MembershipChange { change: Some(c) }) .collect(); let req = ChangeMembershipRequest { changes }; let timeout = self.config.wait_synced_timeout(); - let _ignore = ctx + let resp = ctx .cluster_state() .map_leader(|conn| async move { conn.change_membership(req, timeout).await }) - .await?; + .await? + .into_inner(); - Ok(()) + Ok(resp) } } diff --git a/crates/curp/src/rpc/connect/lazy.rs b/crates/curp/src/rpc/connect/lazy.rs index 5ca26bd98..6e69a148c 100644 --- a/crates/curp/src/rpc/connect/lazy.rs +++ b/crates/curp/src/rpc/connect/lazy.rs @@ -14,11 +14,11 @@ use crate::{ commandpb::protocol_client::ProtocolClient, inner_messagepb::inner_protocol_client::InnerProtocolClient, }, - AppendEntriesRequest, AppendEntriesResponse, ChangeMembershipRequest, - ChangeMembershipResponse, CurpError, FetchMembershipRequest, FetchMembershipResponse, - FetchReadStateRequest, FetchReadStateResponse, InstallSnapshotResponse, MoveLeaderRequest, - MoveLeaderResponse, OpResponse, ProposeRequest, ReadIndexResponse, RecordRequest, - RecordResponse, ShutdownRequest, ShutdownResponse, VoteRequest, VoteResponse, + AppendEntriesRequest, AppendEntriesResponse, ChangeMembershipRequest, CurpError, + FetchMembershipRequest, FetchReadStateRequest, FetchReadStateResponse, + InstallSnapshotResponse, MembershipResponse, MoveLeaderRequest, MoveLeaderResponse, + OpResponse, ProposeRequest, ReadIndexResponse, RecordRequest, RecordResponse, + ShutdownRequest, ShutdownResponse, VoteRequest, VoteResponse, }, snapshot::Snapshot, }; @@ -231,7 +231,7 @@ impl ConnectApi for ConnectLazy> { &self, request: FetchMembershipRequest, timeout: Duration, - ) -> Result, CurpError> { + ) -> Result, CurpError> { let mut inner = self.inner.lock().await; self.connect_inner(&mut inner); inner @@ -245,7 +245,7 @@ impl ConnectApi for ConnectLazy> { &self, request: ChangeMembershipRequest, timeout: Duration, - ) -> Result, CurpError> { + ) -> Result, CurpError> { let mut inner = self.inner.lock().await; self.connect_inner(&mut inner); inner diff --git a/crates/curp/src/rpc/connect/mod.rs b/crates/curp/src/rpc/connect/mod.rs index 48258ceed..5a5d54312 100644 --- a/crates/curp/src/rpc/connect/mod.rs +++ b/crates/curp/src/rpc/connect/mod.rs @@ -53,8 +53,8 @@ use crate::{ use super::{ proto::commandpb::{ReadIndexRequest, ReadIndexResponse}, reconnect::Reconnect, - ChangeMembershipRequest, ChangeMembershipResponse, FetchMembershipRequest, - FetchMembershipResponse, OpResponse, RecordRequest, RecordResponse, + ChangeMembershipRequest, FetchMembershipRequest, MembershipResponse, OpResponse, RecordRequest, + RecordResponse, }; /// Install snapshot chunk size: 64KB @@ -224,14 +224,14 @@ pub(crate) trait ConnectApi: Send + Sync + 'static { &self, request: FetchMembershipRequest, timeout: Duration, - ) -> Result, CurpError>; + ) -> Result, CurpError>; /// Changes the membership async fn change_membership( &self, request: ChangeMembershipRequest, timeout: Duration, - ) -> Result, CurpError>; + ) -> Result, CurpError>; } /// Inner Connect interface among different servers @@ -497,7 +497,7 @@ impl ConnectApi for Connect> { &self, request: FetchMembershipRequest, timeout: Duration, - ) -> Result, CurpError> { + ) -> Result, CurpError> { let mut client = self.rpc_connect.clone(); let req = tonic::Request::new(request); with_timeout!(timeout, client.fetch_membership(req)).map_err(Into::into) @@ -507,7 +507,7 @@ impl ConnectApi for Connect> { &self, request: ChangeMembershipRequest, timeout: Duration, - ) -> Result, CurpError> { + ) -> Result, CurpError> { let mut client = self.rpc_connect.clone(); let req = tonic::Request::new(request); with_timeout!(timeout, client.change_membership(req)).map_err(Into::into) @@ -772,7 +772,7 @@ where &self, request: FetchMembershipRequest, _timeout: Duration, - ) -> Result, CurpError> { + ) -> Result, CurpError> { let mut req = tonic::Request::new(request); req.metadata_mut().inject_bypassed(); req.metadata_mut().inject_current(); @@ -783,7 +783,7 @@ where &self, request: ChangeMembershipRequest, _timeout: Duration, - ) -> Result, CurpError> { + ) -> Result, CurpError> { let mut req = tonic::Request::new(request); req.metadata_mut().inject_bypassed(); req.metadata_mut().inject_current(); diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index 85dae21af..93a45df91 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -28,14 +28,13 @@ pub use self::proto::{ protocol_client, protocol_server::{Protocol, ProtocolServer}, ChangeMembershipRequest, - ChangeMembershipResponse, CmdResult, FetchMembershipRequest, - FetchMembershipResponse, FetchReadStateRequest, FetchReadStateResponse, LeaseKeepAliveMsg, Member, + MembershipResponse, MoveLeaderRequest, MoveLeaderResponse, Node, @@ -790,7 +789,7 @@ impl std::fmt::Display for ProposeId { } } -impl FetchMembershipResponse { +impl MembershipResponse { /// Consumes self and returns a `Membership` pub(crate) fn into_membership(self) -> Membership { let Self { members, nodes, .. } = self; diff --git a/crates/curp/src/rpc/reconnect.rs b/crates/curp/src/rpc/reconnect.rs index 2e689be46..01039b8f3 100644 --- a/crates/curp/src/rpc/reconnect.rs +++ b/crates/curp/src/rpc/reconnect.rs @@ -7,10 +7,10 @@ use futures::Stream; use crate::{ members::ServerId, rpc::{ - connect::ConnectApi, ChangeMembershipRequest, ChangeMembershipResponse, CurpError, - FetchMembershipRequest, FetchMembershipResponse, FetchReadStateRequest, - FetchReadStateResponse, MoveLeaderRequest, MoveLeaderResponse, OpResponse, ProposeRequest, - ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, ShutdownResponse, + connect::ConnectApi, ChangeMembershipRequest, CurpError, FetchMembershipRequest, + FetchReadStateRequest, FetchReadStateResponse, MembershipResponse, MoveLeaderRequest, + MoveLeaderResponse, OpResponse, ProposeRequest, ReadIndexResponse, RecordRequest, + RecordResponse, ShutdownRequest, ShutdownResponse, }, }; @@ -159,7 +159,7 @@ impl ConnectApi for Reconnect { &self, request: FetchMembershipRequest, timeout: Duration, - ) -> Result, CurpError> { + ) -> Result, CurpError> { execute_with_reconnect!(self, ConnectApi::fetch_membership, request, timeout) } @@ -168,7 +168,7 @@ impl ConnectApi for Reconnect { &self, request: ChangeMembershipRequest, timeout: Duration, - ) -> Result, CurpError> { + ) -> Result, CurpError> { execute_with_reconnect!(self, ConnectApi::change_membership, request, timeout) } } diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 5a3645493..8efbcc06f 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -18,13 +18,14 @@ use super::CurpNode; use crate::log_entry::EntryData; use crate::log_entry::LogEntry; use crate::member::Membership; +use crate::rpc; use crate::rpc::connect::InnerConnectApiWrapper; use crate::rpc::inner_connects; use crate::rpc::Change; use crate::rpc::ChangeMembershipRequest; -use crate::rpc::ChangeMembershipResponse; use crate::rpc::CurpError; use crate::rpc::MembershipChange; +use crate::rpc::MembershipResponse; use crate::rpc::ProposeId; use crate::rpc::Redirect; use crate::server::raw_curp::node_state::NodeState; @@ -35,7 +36,7 @@ impl, RC: RoleChange> CurpNode { pub(crate) async fn change_membership( &self, request: ChangeMembershipRequest, - ) -> Result { + ) -> Result { self.ensure_leader()?; let changes = request .changes @@ -58,7 +59,36 @@ impl, RC: RoleChange> CurpNode { self.wait_commit(Some(propose_id)).await; } - Ok(ChangeMembershipResponse {}) + self.build_membership_response() + } + + /// Builds a `ChangeMembershipResponse` from the given membership. + pub(crate) fn build_membership_response(&self) -> Result { + let (leader_id, term, _) = self.curp.leader(); + let Membership { members, nodes } = self.curp.effective_membership(); + let members = members + .into_iter() + .map(|s| rpc::QuorumSet { + set: s.into_iter().collect(), + }) + .collect(); + let nodes = nodes + .into_iter() + .map(|(node_id, meta)| rpc::Node { + node_id, + meta: Some(meta), + }) + .collect(); + + let leader_id = + leader_id.ok_or(CurpError::LeaderTransfer("no current leader".to_owned()))?; + + Ok(MembershipResponse { + members, + nodes, + term, + leader_id, + }) } /// Wait the command with the propose id to be committed diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 76a2de593..a031b209e 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -37,16 +37,16 @@ use super::{ use crate::{ cmd::{Command, CommandExecutor}, log_entry::{EntryData, LogEntry}, - member::{Membership, MembershipInfo}, + member::MembershipInfo, response::ResponseSender, role_change::RoleChange, rpc::{ self, connect::{InnerConnectApi, InnerConnectApiWrapper}, AppendEntriesRequest, AppendEntriesResponse, CurpError, FetchMembershipRequest, - FetchMembershipResponse, FetchReadStateRequest, FetchReadStateResponse, - InstallSnapshotRequest, InstallSnapshotResponse, LeaseKeepAliveMsg, MoveLeaderRequest, - MoveLeaderResponse, Node, PoolEntry, ProposeId, ProposeRequest, ProposeResponse, QuorumSet, + FetchReadStateRequest, FetchReadStateResponse, InstallSnapshotRequest, + InstallSnapshotResponse, LeaseKeepAliveMsg, MembershipResponse, MoveLeaderRequest, + MoveLeaderResponse, PoolEntry, ProposeId, ProposeRequest, ProposeResponse, ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, ShutdownResponse, SyncedResponse, TriggerShutdownRequest, TriggerShutdownResponse, TryBecomeLeaderNowRequest, TryBecomeLeaderNowResponse, VoteRequest, VoteResponse, @@ -361,32 +361,8 @@ impl, RC: RoleChange> CurpNode { pub(super) fn fetch_membership( &self, _req: FetchMembershipRequest, - ) -> Result { - let (leader_id, term, _) = self.curp.leader(); - let Membership { members, nodes } = self.curp.effective_membership(); - let members = members - .into_iter() - .map(|s| QuorumSet { - set: s.into_iter().collect(), - }) - .collect(); - let nodes = nodes - .into_iter() - .map(|(node_id, meta)| Node { - node_id, - meta: Some(meta), - }) - .collect(); - - let leader_id = - leader_id.ok_or(CurpError::LeaderTransfer("no current leader".to_owned()))?; - - Ok(FetchMembershipResponse { - members, - nodes, - term, - leader_id, - }) + ) -> Result { + self.build_membership_response() } } diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index 4b729111f..75d19ebe8 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -26,14 +26,13 @@ use crate::rpc::connect::Bypass; use crate::rpc::AppendEntriesRequest; use crate::rpc::AppendEntriesResponse; use crate::rpc::ChangeMembershipRequest; -use crate::rpc::ChangeMembershipResponse; use crate::rpc::FetchMembershipRequest; -use crate::rpc::FetchMembershipResponse; use crate::rpc::FetchReadStateRequest; use crate::rpc::FetchReadStateResponse; use crate::rpc::InstallSnapshotRequest; use crate::rpc::InstallSnapshotResponse; use crate::rpc::LeaseKeepAliveMsg; +use crate::rpc::MembershipResponse; use crate::rpc::MoveLeaderRequest; use crate::rpc::MoveLeaderResponse; use crate::rpc::OpResponse; @@ -186,7 +185,7 @@ impl, RC: RoleChange> crate::rpc::Protocol fo async fn fetch_membership( &self, request: tonic::Request, - ) -> Result, tonic::Status> { + ) -> Result, tonic::Status> { Ok(tonic::Response::new( self.inner.fetch_membership(request.into_inner())?, )) @@ -196,7 +195,7 @@ impl, RC: RoleChange> crate::rpc::Protocol fo async fn change_membership( &self, request: tonic::Request, - ) -> Result, tonic::Status> { + ) -> Result, tonic::Status> { self.inner .change_membership(request.into_inner()) .await diff --git a/crates/curp/tests/it/common/curp_group.rs b/crates/curp/tests/it/common/curp_group.rs index b36ed1155..3ef02ac99 100644 --- a/crates/curp/tests/it/common/curp_group.rs +++ b/crates/curp/tests/it/common/curp_group.rs @@ -56,7 +56,7 @@ pub mod commandpb { } pub use commandpb::{ - protocol_client::ProtocolClient, FetchMembershipResponse, ProposeRequest, ProposeResponse, + protocol_client::ProtocolClient, MembershipResponse, ProposeRequest, ProposeResponse, }; use self::commandpb::FetchMembershipRequest; @@ -434,7 +434,7 @@ impl CurpGroup { Err(e) => continue, }; - let FetchMembershipResponse { + let MembershipResponse { leader_id, term, .. } = if let Ok(resp) = client.fetch_membership(FetchMembershipRequest {}).await { resp.into_inner() @@ -475,7 +475,7 @@ impl CurpGroup { Err(e) => continue, }; - let FetchMembershipResponse { + let MembershipResponse { leader_id, term, .. } = if let Ok(resp) = client.fetch_membership(FetchMembershipRequest {}).await { resp.into_inner() diff --git a/crates/xline/src/server/auth_wrapper.rs b/crates/xline/src/server/auth_wrapper.rs index adaba1ef2..ea0c07833 100644 --- a/crates/xline/src/server/auth_wrapper.rs +++ b/crates/xline/src/server/auth_wrapper.rs @@ -3,11 +3,10 @@ use std::sync::Arc; use curp::{ cmd::PbCodec, rpc::{ - ChangeMembershipRequest, ChangeMembershipResponse, FetchMembershipRequest, - FetchMembershipResponse, FetchReadStateRequest, FetchReadStateResponse, LeaseKeepAliveMsg, - MoveLeaderRequest, MoveLeaderResponse, OpResponse, ProposeRequest, Protocol, - ReadIndexRequest, ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, - ShutdownResponse, + ChangeMembershipRequest, FetchMembershipRequest, FetchReadStateRequest, + FetchReadStateResponse, LeaseKeepAliveMsg, MembershipResponse, MoveLeaderRequest, + MoveLeaderResponse, OpResponse, ProposeRequest, Protocol, ReadIndexRequest, + ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, ShutdownResponse, }, }; use flume::r#async::RecvStream; @@ -104,14 +103,14 @@ impl Protocol for AuthWrapper { async fn fetch_membership( &self, request: tonic::Request, - ) -> Result, tonic::Status> { + ) -> Result, tonic::Status> { self.curp_server.fetch_membership(request).await } async fn change_membership( &self, request: tonic::Request, - ) -> Result, tonic::Status> { + ) -> Result, tonic::Status> { self.curp_server.change_membership(request).await } } From acb5a07949d8747793d75e764609babdd569b19d Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 8 Oct 2024 17:31:35 +0800 Subject: [PATCH 230/322] test: do not wait for node shutdown in membership change Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/tests/it/server.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/crates/curp/tests/it/server.rs b/crates/curp/tests/it/server.rs index 068a0403c..7fe035bf0 100644 --- a/crates/curp/tests/it/server.rs +++ b/crates/curp/tests/it/server.rs @@ -692,9 +692,6 @@ async fn propose_conf_change_rpc_should_work_when_client_has_wrong_cluster() { .change_membership(vec![Change::Remove(node_id)]) .await .unwrap(); - group - .wait_for_node_shutdown(node_id, DEFAULT_SHUTDOWN_TIMEOUT) - .await; } #[tokio::test(flavor = "multi_thread")] From 49a830e6d6170d877d66cb6f0eebd15e8441b455 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 8 Oct 2024 17:39:11 +0800 Subject: [PATCH 231/322] test: fix remove leader will invalidate client id Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/tests/it/server.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/crates/curp/tests/it/server.rs b/crates/curp/tests/it/server.rs index 7fe035bf0..92a20ecdb 100644 --- a/crates/curp/tests/it/server.rs +++ b/crates/curp/tests/it/server.rs @@ -503,10 +503,12 @@ async fn propose_remove_leader_should_success() { .unwrap(); assert_cluster(&client, 3, 2, []).await; - client - .change_membership(vec![Change::Remove(id)]) - .await - .unwrap(); + while { + client + .change_membership(vec![Change::Remove(id)]) + .await + .is_err() + } {} assert_cluster(&client, 2, 2, []).await; let new_id = client.fetch_leader_id(true).await.unwrap(); @@ -704,7 +706,8 @@ async fn move_leader_should_move_leadership_to_target_node() { let old_leader = group.get_leader().await.0; let target = *group.nodes.keys().find(|&id| &old_leader != id).unwrap(); - client.move_leader(target).await.unwrap(); + // TODO: investigate why moving the leader invalidates the client id + while client.move_leader(target).await.is_err() {} let new_leader = group.get_leader().await.0; assert_eq!(target, new_leader); From 1aed29c7da4992e5c87a2008ea16fcaba344a86c Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 8 Oct 2024 18:08:56 +0800 Subject: [PATCH 232/322] fix: spawn sync follower task should exclude self Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/mod.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index a031b209e..d7e8ba1ae 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -814,8 +814,12 @@ impl, RC: RoleChange> CurpNode { Self::election_task(Arc::clone(&curp), n) }); + let self_id = curp.id(); curp.with_member_connects(|connects| { - for c in connects.values() { + for (id, c) in connects { + if *id == self_id { + continue; + } let (sync_event, remove_event) = curp.events(c.id()); task_manager.spawn(TaskName::SyncFollower, |n| { Self::sync_follower_task( From 6855ed61ecead114dfe4f2f4c4431a0413dd9c2d Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 8 Oct 2024 19:54:15 +0800 Subject: [PATCH 233/322] test: fix toml config Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/utils/src/config.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/crates/utils/src/config.rs b/crates/utils/src/config.rs index 8940aa3f6..57f0880d1 100644 --- a/crates/utils/src/config.rs +++ b/crates/utils/src/config.rs @@ -1621,12 +1621,28 @@ mod tests { peer_advertise_urls = ['127.0.0.1:2380'] client_listen_urls = ['127.0.0.1:2379'] client_advertise_urls = ['127.0.0.1:2379'] + node_id = 1 [cluster.peers] node1 = ['127.0.0.1:2379'] node2 = ['127.0.0.1:2380'] node3 = ['127.0.0.1:2381'] + [cluster.initial_membership_info.node1] + id = 1 + peer_urls = ['127.0.0.1:2380'] + client_urls = ['127.0.0.1:2379'] + + [cluster.initial_membership_info.node2] + id = 2 + peer_urls = ['127.0.0.1:2480'] + client_urls = ['127.0.0.1:2479'] + + [cluster.initial_membership_info.node3] + id = 3 + peer_urls = ['127.0.0.1:2580'] + client_urls = ['127.0.0.1:2579'] + [cluster.storage] [log] From 5ec40551352d4734e2e89a0324f380dec05b59e1 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 8 Oct 2024 20:29:47 +0800 Subject: [PATCH 234/322] test: fix duplicate node id Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/xline-client/tests/it/member.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/xline-client/tests/it/member.rs b/crates/xline-client/tests/it/member.rs index b9e54c896..cffbc7c8f 100644 --- a/crates/xline-client/tests/it/member.rs +++ b/crates/xline-client/tests/it/member.rs @@ -9,8 +9,8 @@ async fn learner_add_and_remove_are_ok() -> Result<()> { let (_cluster, client) = get_cluster_client().await.unwrap(); let client = client.member_client(); - let node1 = Node::new(1, "n1", vec!["10.0.0.4:2380"], vec!["10.0.0.4.2379"]); - let node2 = Node::new(2, "n2", vec!["10.0.0.5:2380"], vec!["10.0.0.5.2379"]); + let node1 = Node::new(11, "n1", vec!["10.0.0.4:2380"], vec!["10.0.0.4.2379"]); + let node2 = Node::new(12, "n2", vec!["10.0.0.5:2380"], vec!["10.0.0.5.2379"]); client .add_learner(vec![node1, node2]) .await @@ -18,7 +18,7 @@ async fn learner_add_and_remove_are_ok() -> Result<()> { // Remove the previously added learners client - .remove_learner(vec![1, 2]) + .remove_learner(vec![11, 12]) .await .expect("failed to remove learners"); From 31a334293062cae63f7e3913741af67ad98143b1 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 8 Oct 2024 21:13:00 +0800 Subject: [PATCH 235/322] fix: exclude self connection when broadcasting vote Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/mod.rs | 30 ++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index d7e8ba1ae..b9f963160 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -8,7 +8,7 @@ use std::{ use clippy_utilities::{NumericCast, OverflowArithmetic}; use engine::{SnapshotAllocator, SnapshotApi}; use event_listener::Event; -use futures::{pin_mut, stream::FuturesUnordered, Stream, StreamExt}; +use futures::{pin_mut, stream::FuturesUnordered, FutureExt, Stream, StreamExt}; use madsim::rand::{thread_rng, Rng}; use opentelemetry::KeyValue; use parking_lot::{Mutex, RwLock}; @@ -848,27 +848,27 @@ impl, RC: RoleChange> CurpNode { /// - `Some(vote)` if bcast pre vote and success /// - `None` if bcast pre vote and fail or bcast vote async fn bcast_vote(curp: &RawCurp, vote: Vote) -> Option { + let self_id = curp.id(); if vote.is_pre_vote { - debug!("{} broadcasts pre votes to all servers", curp.id()); + debug!("{self_id} broadcasts pre votes to all servers"); } else { - debug!("{} broadcasts votes to all servers", curp.id()); + debug!("{self_id} broadcasts votes to all servers"); } let rpc_timeout = curp.cfg().rpc_timeout; let voters_connects = curp.voters_connects(); + let req = VoteRequest::new( + vote.term, + vote.candidate_id, + vote.last_log_index, + vote.last_log_term, + vote.is_pre_vote, + ); let resps = voters_connects .into_iter() - .map(|(id, connect)| { - let req = VoteRequest::new( - vote.term, - vote.candidate_id, - vote.last_log_index, - vote.last_log_term, - vote.is_pre_vote, - ); - async move { - let resp = connect.vote(req, rpc_timeout).await; - (id, resp) - } + .filter_map(|(id, connect)| { + (id != self_id).then_some(async move { + connect.vote(req, rpc_timeout).map(|res| (id, res)).await + }) }) .collect::>() .filter_map(|(id, resp)| async move { From 88950059d106133a6c8bd8e9fa01b0d7a1552f5a Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 8 Oct 2024 21:22:59 +0800 Subject: [PATCH 236/322] fix: add a workaround when removing the leader in cluster server Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/xline/src/server/cluster_server.rs | 24 ++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/crates/xline/src/server/cluster_server.rs b/crates/xline/src/server/cluster_server.rs index 19393994e..42fc135ff 100644 --- a/crates/xline/src/server/cluster_server.rs +++ b/crates/xline/src/server/cluster_server.rs @@ -100,13 +100,18 @@ impl Cluster for ClusterServer { let id = request.into_inner().id; // In etcd a member could be a learner, and could return CurpError::InvalidMemberChange // TODO: handle other errors that may returned - let _ignore = self - .client - .change_membership(vec![Change::Demote(id)]) - .await; self.client - .change_membership(vec![Change::Remove(id)]) + .change_membership(vec![Change::Demote(id)]) .await?; + while self + .client + .change_membership(vec![Change::Remove(id)]) + .await + // TODO: This is workaround for removed leader, we need retry to update the client id + // use a method to manually update it + .is_err_and(|e| e.code() == tonic::Code::FailedPrecondition) + {} + let members = self.fetch_members(true).await?; Ok(tonic::Response::new(MemberRemoveResponse { @@ -133,9 +138,14 @@ impl Cluster for ClusterServer { .change_membership(vec![Change::Demote(id)]) .await?; } - self.client + while self + .client .change_membership(vec![Change::Remove(id)]) - .await?; + .await + // TODO: This is workaround for removed leader, we need retry to update the client id + // use a method to manually update it + .is_err_and(|e| e.code() == tonic::Code::FailedPrecondition) + {} let meta = NodeMetadata::new( member.name.clone(), From 5050be4f1c8c1c851e880ddc5dc92e0ca4bcc711 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 8 Oct 2024 21:25:55 +0800 Subject: [PATCH 237/322] fix: use client urls in lease server redirection Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/xline/src/server/lease_server.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/xline/src/server/lease_server.rs b/crates/xline/src/server/lease_server.rs index 2bfb815ea..9f86594b2 100644 --- a/crates/xline/src/server/lease_server.rs +++ b/crates/xline/src/server/lease_server.rs @@ -314,7 +314,7 @@ impl Lease for LeaseServer { return Err(tonic::Status::internal("Leader not exist")); }; break self - .follower_keep_alive(request_stream, leader_meta.peer_urls()) + .follower_keep_alive(request_stream, leader_meta.client_urls()) .await?; } }; From f900032ceeac31984ea162731e348e839a6035e8 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 8 Oct 2024 21:38:39 +0800 Subject: [PATCH 238/322] test: fix test_status Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/xline/tests/it/maintenance_test.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/xline/tests/it/maintenance_test.rs b/crates/xline/tests/it/maintenance_test.rs index 77cde73cd..1d17559f2 100644 --- a/crates/xline/tests/it/maintenance_test.rs +++ b/crates/xline/tests/it/maintenance_test.rs @@ -104,12 +104,16 @@ async fn test_status() -> Result<(), Box> { let mut cluster = Cluster::new_rocks(3).await; cluster.start().await; let client = cluster.client().await; + // adds some data to the db + let _ignore = client.kv_client().put("foo", "bar", None).await?; + // ensure that the key is persistent + let _ignore = client.kv_client().put("foo", "bar1", None).await?; let mut maintenance_client = client.maintenance_client(); let res = maintenance_client.status().await?; assert_eq!(res.version, env!("CARGO_PKG_VERSION")); assert!(res.db_size > 0); assert!(res.db_size_in_use > 0); - assert_ne!(res.leader, 0); + assert_eq!(res.leader, 0); assert!(res.raft_index >= res.raft_applied_index); assert_eq!(res.raft_term, 1); assert!(res.raft_applied_index > 0); From 74c5905e70423be353cd80756db661e947bd29af Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 10 Oct 2024 10:41:05 +0800 Subject: [PATCH 239/322] test: add tests for Joint Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/quorum.rs | 60 ++++++++++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 7 deletions(-) diff --git a/crates/curp/src/quorum.rs b/crates/curp/src/quorum.rs index c3ed2bc58..4f985af5e 100644 --- a/crates/curp/src/quorum.rs +++ b/crates/curp/src/quorum.rs @@ -30,19 +30,13 @@ where { /// Generates a new coherent joint quorum set pub(crate) fn coherent(&self, other: Self) -> Self { - if self.is_superset(&other) { + if other.sets.iter().any(|set| self.sets.contains(set)) { return other; } - // TODO: select the config where the leader is in let last = self.sets.last().cloned(); Self::new(last.into_iter().chain(other.sets).collect()) } - - /// Checks if `self` is a superset of `other` - fn is_superset(&self, other: &Self) -> bool { - other.sets.iter().all(|s| self.sets.contains(s)) - } } /// A quorum set @@ -119,3 +113,55 @@ where self.sets.iter().all(|s| s.is_recover_quorum(ids.clone())) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Debug, PartialEq, Clone)] + struct MockQuorumSet; + + fn assert_coherent(from: &[BTreeSet], to: &[BTreeSet], expect: &[BTreeSet]) { + let joint_from = Joint::new(from.to_vec()); + let joint_to = Joint::new(to.to_vec()); + let joint_coherent = joint_from.coherent(joint_to); + assert_eq!( + joint_coherent.sets, expect, + "from: {from:?}, to: {to:?}, expect: {expect:?}" + ); + } + + #[test] + fn test_joint_coherent() { + assert_coherent( + &[BTreeSet::from([1, 2, 3])], + &[BTreeSet::from([1, 2, 3])], + &[BTreeSet::from([1, 2, 3])], + ); + assert_coherent( + &[BTreeSet::from([1, 2, 3])], + &[BTreeSet::from([1, 2, 3, 4])], + &[BTreeSet::from([1, 2, 3]), BTreeSet::from([1, 2, 3, 4])], + ); + assert_coherent( + &[BTreeSet::from([1, 2, 3])], + &[BTreeSet::from([4, 5])], + &[BTreeSet::from([1, 2, 3]), BTreeSet::from([4, 5])], + ); + assert_coherent( + &[BTreeSet::from([1, 2, 3]), BTreeSet::from([1, 2, 3, 4])], + &[BTreeSet::from([1, 2, 3, 4])], + &[BTreeSet::from([1, 2, 3, 4])], + ); + assert_coherent( + &[BTreeSet::from([1, 2, 3]), BTreeSet::from([4, 5])], + &[BTreeSet::from([4, 5])], + &[BTreeSet::from([4, 5])], + ); + assert_coherent( + &[BTreeSet::from([4, 5])], + &[BTreeSet::from([1, 2, 3]), BTreeSet::from([4, 5])], + &[BTreeSet::from([1, 2, 3]), BTreeSet::from([4, 5])], + ); + } +} From 24209e13501b4dca25cae387a76e1d5dfbc335c6 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:49:07 +0800 Subject: [PATCH 240/322] test: add tests for QuorumSet Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/quorum.rs | 56 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/crates/curp/src/quorum.rs b/crates/curp/src/quorum.rs index 4f985af5e..a439286fc 100644 --- a/crates/curp/src/quorum.rs +++ b/crates/curp/src/quorum.rs @@ -69,12 +69,12 @@ where fn is_super_quorum(&self, ids: I) -> bool { let num = ids.into_iter().filter(|id| self.contains(id)).count(); - num * 4 > 3 * self.len() + num * 4 >= 3 * self.len() } fn is_recover_quorum(&self, ids: I) -> bool { let num = ids.into_iter().filter(|id| self.contains(id)).count(); - num * 4 - 2 > self.len() + num * 4 > self.len() + 2 } } @@ -164,4 +164,56 @@ mod tests { &[BTreeSet::from([1, 2, 3]), BTreeSet::from([4, 5])], ); } + + fn power_set(set: &BTreeSet) -> Vec> { + (0..(1 << set.len())) + .map(|i| { + set.iter() + .enumerate() + .filter_map(|(j, x)| ((i >> j) & 1 == 1).then_some(*x)) + .collect() + }) + .collect() + } + + #[test] + fn test_quorum_should_work() { + let nodes = vec![1, 2, 3, 4, 5, 6, 7]; + // (quorum, recover_quorum, super_quorum) + let expected_res = vec![ + (1, 1, 1), + (2, 2, 2), + (2, 2, 3), + (3, 2, 3), + (3, 2, 4), + (4, 3, 5), + (4, 3, 6), + ]; + + for (node_cnt, (quorum, recover_quorum, super_quorum)) in + nodes.into_iter().zip(expected_res.into_iter()) + { + let set: BTreeSet = (0..node_cnt).collect(); + for sub in power_set(&set) { + let is_quorum = set.is_quorum(sub.clone()); + let is_recover_quorum = set.is_recover_quorum(sub.clone()); + let is_super_quorum = set.is_super_quorum(sub.clone()); + assert_eq!(sub.len() >= quorum, is_quorum); + assert_eq!( + sub.len() >= recover_quorum, + is_recover_quorum, + "size: {}, sub: {}", + set.len(), + sub.len() + ); + assert_eq!( + sub.len() >= super_quorum, + is_super_quorum, + "size: {}, sub: {}", + set.len(), + sub.len() + ); + } + } + } } From 4f18ba16c4436a1d8760f94201438e338227c5d2 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 10 Oct 2024 20:34:33 +0800 Subject: [PATCH 241/322] test: add tests for membership Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 193 ++++++++++++++++-- .../curp/src/server/raw_curp/member_impl.rs | 3 +- crates/curp/src/server/raw_curp/mod.rs | 3 +- 3 files changed, 181 insertions(+), 18 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 1b295fbf6..761062f07 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -155,29 +155,19 @@ impl MembershipState { /// Returns the committed membership #[cfg(test)] - pub(crate) fn committed(&self, commit_index: LogIndex) -> &Membership { - &self - .entries - .iter() - .take_while(|entry| entry.index <= commit_index) - .last() - .unwrap() - .membership + pub(crate) fn committed(&self) -> &Membership { + &self.entries.first().unwrap().membership } /// Generates a new membership from `Change` /// /// Returns an empty `Vec` if there's an on-going membership change - pub(crate) fn changes( - &self, - changes: Changes, - commit_index: LogIndex, - ) -> Vec + pub(crate) fn changes(&self, changes: Changes) -> Vec where Changes: IntoIterator, { // membership uncommitted, return an empty vec - if self.last().index > commit_index { + if self.entries.len() != 1 { return vec![]; } self.last().membership.changes(changes) @@ -354,3 +344,178 @@ impl ClusterId for MembershipInfo { self.clone().into_membership().cluster_id() } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::rpc::{Node, NodeMetadata}; + + #[test] + fn test_membership_info_into_membership_ok() { + let init_members = BTreeMap::from([(1, NodeMetadata::default())]); + let membership_info = MembershipInfo::new(1, init_members.clone()); + let membership = Membership::new( + vec![BTreeSet::from([1])], + BTreeMap::from([(1, NodeMetadata::default())]), + ); + assert_eq!(membership_info.into_membership(), membership); + } + + fn build_membership(member_sets: impl IntoIterator>) -> Membership { + let members: Vec> = member_sets + .into_iter() + .map(|s| s.into_iter().collect()) + .collect(); + let nodes: BTreeMap = members + .iter() + .flat_map(|s| s.iter().map(|id| (*id, NodeMetadata::default()))) + .collect(); + Membership::new(members, nodes) + } + + fn build_membership_with_learners( + member_sets: impl IntoIterator>, + learners: impl IntoIterator, + ) -> Membership { + let members: Vec> = member_sets + .into_iter() + .map(|s| s.into_iter().collect()) + .collect(); + let nodes: BTreeMap = members + .iter() + .flat_map(|s| s.iter().copied()) + .chain(learners.into_iter()) + .map(|id| (id, NodeMetadata::default())) + .collect(); + Membership::new(members, nodes) + } + + #[test] + fn test_membership_state_append_will_update_effective() { + let m0 = build_membership([vec![1]]); + let mut membership_state = MembershipState::new(m0.clone()); + assert_eq!(*membership_state.effective(), m0); + + let m1 = build_membership([vec![1], vec![1, 2]]); + membership_state.append(1, m1.clone()); + assert_eq!(*membership_state.effective(), m1); + + let m2 = build_membership([vec![1, 2]]); + membership_state.append(2, m2.clone()); + assert_eq!(*membership_state.effective(), m2); + } + + #[test] + fn test_membership_state_commit_will_update_committed() { + let m0 = build_membership([vec![1]]); + let mut membership_state = MembershipState::new(m0.clone()); + assert_eq!(*membership_state.committed(), m0); + + let m1 = build_membership([vec![1], vec![1, 2]]); + membership_state.append(1, m1.clone()); + assert_eq!(*membership_state.effective(), m1); + assert_eq!(*membership_state.committed(), m0); + + membership_state.commit(1); + assert_eq!(*membership_state.effective(), m1); + assert_eq!(*membership_state.committed(), m1); + + let m2 = build_membership([vec![1, 2]]); + membership_state.append(2, m2.clone()); + let m3 = build_membership([vec![1, 2], vec![1, 2, 3]]); + membership_state.append(3, m3.clone()); + let m4 = build_membership([vec![1, 2, 3]]); + membership_state.append(4, m4.clone()); + + assert_eq!(*membership_state.effective(), m4); + + membership_state.commit(2); + assert_eq!(*membership_state.committed(), m2); + membership_state.commit(4); + assert_eq!(*membership_state.committed(), m4); + } + + #[test] + fn test_membership_state_truncate_ok() { + let m0 = build_membership([vec![1]]); + let mut membership_state = MembershipState::new(m0.clone()); + assert_eq!(*membership_state.committed(), m0); + + let m1 = build_membership([vec![1], vec![1, 2]]); + membership_state.append(1, m1.clone()); + let m2 = build_membership([vec![1, 2]]); + membership_state.append(2, m2.clone()); + let m3 = build_membership([vec![1, 2], vec![1, 2, 3]]); + membership_state.append(3, m3.clone()); + let m4 = build_membership([vec![1, 2, 3]]); + membership_state.append(4, m4.clone()); + + assert_eq!(*membership_state.effective(), m4); + + membership_state.commit(2); + membership_state.truncate(3); + + assert_eq!(*membership_state.committed(), m2); + assert_eq!(*membership_state.effective(), m3); + } + + #[test] + fn test_membership_changes_ok() { + let mut index = 1; + let mut membership_state = MembershipState::new(build_membership([vec![1]])); + let mut apply_changes = |state: &mut MembershipState, changes: Vec| { + for change in changes { + state.append(index, change); + state.commit(index); + index += 1; + } + }; + + let changes = + membership_state.changes([Change::Add(Node::new(2, NodeMetadata::default()))]); + assert_eq!( + changes, + vec![build_membership_with_learners([vec![1]], [2])] + ); + apply_changes(&mut membership_state, changes.clone()); + + let changes = membership_state.changes([Change::Promote(2)]); + assert_eq!( + changes, + vec![ + build_membership([vec![1], vec![1, 2]]), + build_membership([vec![1, 2]]) + ] + ); + apply_changes(&mut membership_state, changes.clone()); + + let changes = membership_state.changes([Change::Demote(2)]); + assert_eq!( + changes, + vec![ + build_membership([vec![1, 2], vec![1]]), + build_membership_with_learners([vec![1]], [2]) + ] + ); + apply_changes(&mut membership_state, changes.clone()); + + let changes = membership_state.changes([Change::Remove(2)]); + assert_eq!(changes, vec![build_membership([vec![1]])]); + apply_changes(&mut membership_state, changes.clone()); + } + + #[test] + fn test_membership_changes_reject_uncommitted() { + let mut index = 1; + let mut membership_state = MembershipState::new(build_membership([vec![1]])); + let changes = + membership_state.changes([Change::Add(Node::new(2, NodeMetadata::default()))]); + for change in changes { + // append but not committed + membership_state.append(index, change); + index += 1; + } + + assert!(membership_state.changes([Change::Promote(2)]).is_empty()); + } +} diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 2e9744a1e..0e8881b3d 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -27,8 +27,7 @@ impl RawCurp { where Changes: IntoIterator, { - self.log - .map_read(|log| self.ms.read().cluster().changes(changes, log.commit_index)) + self.ms.read().cluster().changes(changes) } /// Updates the role if the node is leader diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 97fa73129..e06eeff88 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -1213,9 +1213,8 @@ impl RawCurp { /// Get the committed membership #[cfg(test)] pub(super) fn committed_membership(&self) -> Membership { - let log_r = self.log.read(); let ms_r = self.ms.read(); - ms_r.cluster().committed(log_r.commit_index).clone() + ms_r.cluster().committed().clone() } /// Get `append_entries` request for `follower_id` that contains the latest From b6cb60a2224a4b715013919f21f3d07394371716 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 10 Oct 2024 22:55:24 +0800 Subject: [PATCH 242/322] test: add tests for fetch Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/fetch.rs | 187 +++++++++++++++++++++++++++++--- crates/curp/src/client/tests.rs | 5 +- 2 files changed, 172 insertions(+), 20 deletions(-) diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index 45e56cf1f..99a0502a5 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -75,27 +75,25 @@ impl Fetch { &self, state: impl Into, ) -> Result<(ClusterStateReady, MembershipResponse), CurpError> { - let state = match state.into() { - ClusterState::Init(state) => { - let resp = self - .fetch_one(&state) - .await - .ok_or(CurpError::internal("cluster not available"))?; - Self::build_cluster_state_from_response(self.connect_to.as_ref(), resp.clone()) - } - ClusterState::Ready(state) => state, - }; - - let (fetch_leader, term_ok) = - tokio::join!(self.fetch_from_leader(&state), self.fetch_term(state)); + let resp = self + .fetch_one(&state.into()) + .await + .ok_or(CurpError::internal("cluster not available"))?; + let new_state = + Self::build_cluster_state_from_response(self.connect_to.as_ref(), resp.clone()); + + let (fetch_leader, term_ok) = tokio::join!( + self.fetch_from_leader(&new_state), + self.fetch_term(new_state) + ); if term_ok { return fetch_leader; } - let (leader_state, resp) = fetch_leader?; + let (leader_state, leader_resp) = fetch_leader?; if self.fetch_term(leader_state.clone()).await { - return Ok((leader_state, resp)); + return Ok((leader_state, leader_resp)); } Err(CurpError::internal("cluster not available")) @@ -150,7 +148,7 @@ impl Fetch { } /// Sends fetch membership request to the cluster, and returns the first response - async fn fetch_one(&self, state: &ClusterStateInit) -> Option { + async fn fetch_one(&self, state: &impl ForEachServer) -> Option { let timeout = self.timeout; let resps: Vec<_> = state .for_each_server(|c| async move { @@ -177,17 +175,22 @@ impl std::fmt::Debug for Fetch { #[cfg(test)] mod test { - use std::{collections::HashMap, sync::Arc, time::Duration}; + use std::{ + collections::{BTreeSet, HashMap}, + sync::Arc, + time::Duration, + }; use futures::stream::FuturesUnordered; use tracing_test::traced_test; use crate::{ client::{ - cluster_state::{ClusterState, ClusterStateInit, ForEachServer}, + cluster_state::{ClusterState, ClusterStateInit, ClusterStateReady, ForEachServer}, config::Config, tests::init_mocked_connects, }, + member::Membership, rpc::{ self, connect::ConnectApi, CurpError, Member, MembershipResponse, Node, NodeMetadata, }, @@ -305,4 +308,152 @@ mod test { // only server(0, 1)'s responses are valid, less than majority quorum(3). fetch.fetch_cluster(connects).await.unwrap_err(); } + + #[traced_test] + #[tokio::test] + async fn test_unary_fetch_clusters_during_membership_change() { + let connects = init_mocked_connects(5, |id, conn| { + match id { + 0 | 1 => { + conn.expect_fetch_membership().returning(|_req, _timeout| { + build_membership_resp(Some(0), 1, vec![0, 1, 2, 3, 4]) + }); + } + 2 | 3 | 4 => { + conn.expect_fetch_membership().returning(|_req, _timeout| { + build_membership_resp(Some(0), 1, vec![0, 1, 2, 3]) + }); + } + _ => unreachable!("there are only 5 nodes"), + }; + }); + let fetch = init_fetch(connects.clone()); + let (_, res) = fetch.fetch_cluster(connects).await.unwrap(); + assert_eq!(res.members[0].set, vec![0, 1, 2, 3, 4]); + } + + #[traced_test] + #[tokio::test] + async fn test_unary_fetch_clusters_with_full_state_case0() { + // No network partition + let connects = init_mocked_connects(5, |id, conn| { + match id { + 0 | 1 | 2 | 3 | 4 => { + conn.expect_fetch_membership().returning(|_req, _timeout| { + build_membership_resp(Some(0), 1, vec![0, 1, 2, 3, 4]) + }); + } + _ => unreachable!("there are only 5 nodes"), + }; + }); + + let fetch = init_fetch(connects.clone()); + // Client cluster state outdated [0, 1, 2, 3] + let membership = Membership::new( + vec![(0..4).collect()], + (0..4).map(|i| (i, NodeMetadata::default())).collect(), + ); + let cluster_state = ClusterStateReady::new(0, 1, connects, membership); + let (_, res) = fetch.fetch_cluster(cluster_state).await.unwrap(); + assert_eq!(res.members[0].set, vec![0, 1, 2, 3, 4]); + assert_eq!(res.leader_id, 0); + assert_eq!(res.term, 1); + } + + #[traced_test] + #[tokio::test] + async fn test_unary_fetch_clusters_with_full_state_case1() { + /// Partitioned + let connects = init_mocked_connects(5, |id, conn| { + match id { + 2 | 3 | 4 => { + conn.expect_fetch_membership().returning(|_req, _timeout| { + build_membership_resp(Some(2), 2, vec![0, 1, 2, 3, 4]) + }); + } + 0 | 1 => { + conn.expect_fetch_membership().returning(|_req, _timeout| { + build_membership_resp(Some(0), 1, vec![0, 1, 2, 3, 4]) + }); + } + + _ => unreachable!("there are only 5 nodes"), + }; + }); + + let fetch = init_fetch(connects.clone()); + // Client cluster state outdated [0, 1, 2, 3] + let membership = Membership::new( + vec![(0..4).collect()], + (0..4).map(|i| (i, NodeMetadata::default())).collect(), + ); + let cluster_state = ClusterStateReady::new(0, 1, connects, membership); + let (_, res) = fetch.fetch_cluster(cluster_state).await.unwrap(); + assert_eq!(res.members[0].set, vec![0, 1, 2, 3, 4]); + assert_eq!(res.leader_id, 2); + assert_eq!(res.term, 2); + } + + #[traced_test] + #[tokio::test] + async fn test_unary_fetch_clusters_with_full_state_case2() { + /// Partitioned, the partitioned part has outdated membership state + let connects = init_mocked_connects(5, |id, conn| { + match id { + 2 | 3 | 4 => { + conn.expect_fetch_membership().returning(|_req, _timeout| { + build_membership_resp(Some(2), 2, vec![0, 1, 2, 3, 4]) + }); + } + 0 | 1 => { + conn.expect_fetch_membership().returning(|_req, _timeout| { + build_membership_resp(Some(0), 1, vec![0, 1, 2, 3]) + }); + } + + _ => unreachable!("there are only 5 nodes"), + }; + }); + + let fetch = init_fetch(connects.clone()); + let membership = Membership::new( + vec![(0..4).collect()], + (0..4).map(|i| (i, NodeMetadata::default())).collect(), + ); + let cluster_state = ClusterStateReady::new(0, 1, connects, membership); + let (_, res) = fetch.fetch_cluster(cluster_state).await.unwrap(); + assert_eq!(res.members[0].set, vec![0, 1, 2, 3, 4]); + assert_eq!(res.leader_id, 2); + assert_eq!(res.term, 2); + } + + #[traced_test] + #[tokio::test] + async fn test_unary_fetch_clusters_with_full_state_case3() { + /// Partitioned, no majority + let connects = init_mocked_connects(5, |id, conn| { + match id { + 0 | 1 => { + conn.expect_fetch_membership().returning(|_req, _timeout| { + build_membership_resp(Some(0), 1, vec![0, 1, 2, 3, 4]) + }); + } + 2 | 3 | 4 => { + conn.expect_fetch_membership().returning(|_req, _timeout| { + build_membership_resp(None, 1, vec![0, 1, 2, 3, 4]) + }); + } + _ => unreachable!("there are only 5 nodes"), + }; + }); + + let fetch = init_fetch(connects.clone()); + // Client cluster state outdated [0, 1, 2, 3, 4] + let membership = Membership::new( + vec![(0..5).collect()], + (0..5).map(|i| (i, NodeMetadata::default())).collect(), + ); + let cluster_state = ClusterStateReady::new(0, 1, connects, membership); + fetch.fetch_cluster(cluster_state).await.unwrap_err(); + } } diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index 636c781b7..f73653c1b 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -361,12 +361,13 @@ async fn test_retry_propose_return_retry_error() { .returning(move |_req, _timeout| Err(err.clone())); }); let unary = init_unary_client(None, None); - let cluster_state = ClusterStateReady::new(0, 1, connects, build_default_membership()); + let cluster_state = + ClusterStateReady::new(0, 1, connects.clone(), build_default_membership()); let retry = Retry::new( unary, RetryConfig::new_fixed(Duration::from_millis(10), 5), KeepAlive::new(Duration::from_secs(1)), - Fetch::new_disable(), + Fetch::new(Duration::from_secs(1), move |_| connects.clone()), ClusterState::Ready(cluster_state), ); let _err = retry From 629c5129d53d12f3442f488f6f6cfbb24c0c0aa3 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 11 Oct 2024 09:49:32 +0800 Subject: [PATCH 243/322] chore: rename ClusterStateReady to ClusterStateFull Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/cluster_state.rs | 18 +++++++++--------- crates/curp/src/client/fetch.rs | 24 ++++++++++++------------ crates/curp/src/client/keep_alive.rs | 6 +++--- crates/curp/src/client/mod.rs | 4 ++-- crates/curp/src/client/retry.rs | 24 ++++++++++++------------ crates/curp/src/client/tests.rs | 22 +++++++++++----------- 6 files changed, 49 insertions(+), 49 deletions(-) diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index 973b719cb..18ce95e84 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -28,7 +28,7 @@ pub(crate) enum ClusterState { /// Initial cluster state Init(ClusterStateInit), /// Ready cluster state - Ready(ClusterStateReady), + Full(ClusterStateFull), } impl From for ClusterState { @@ -37,9 +37,9 @@ impl From for ClusterState { } } -impl From for ClusterState { - fn from(ready: ClusterStateReady) -> Self { - ClusterState::Ready(ready) +impl From for ClusterState { + fn from(ready: ClusterStateFull) -> Self { + ClusterState::Full(ready) } } @@ -50,7 +50,7 @@ impl ForEachServer for ClusterState { ) -> FuturesUnordered { match *self { ClusterState::Init(ref init) => init.for_each_server(f), - ClusterState::Ready(ref ready) => ready.for_each_server(f), + ClusterState::Full(ref ready) => ready.for_each_server(f), } } } @@ -90,7 +90,7 @@ impl std::fmt::Debug for ClusterStateInit { /// The cluster state that is ready for client propose #[derive(Clone, Default)] -pub(crate) struct ClusterStateReady { +pub(crate) struct ClusterStateFull { /// The membership state membership: Membership, /// Leader id. @@ -101,7 +101,7 @@ pub(crate) struct ClusterStateReady { connects: HashMap>, } -impl std::fmt::Debug for ClusterStateReady { +impl std::fmt::Debug for ClusterStateFull { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("State") .field("leader", &self.leader) @@ -111,7 +111,7 @@ impl std::fmt::Debug for ClusterStateReady { } } -impl ForEachServer for ClusterStateReady { +impl ForEachServer for ClusterStateFull { fn for_each_server>( &self, f: impl FnMut(Arc) -> F, @@ -120,7 +120,7 @@ impl ForEachServer for ClusterStateReady { } } -impl ClusterStateReady { +impl ClusterStateFull { /// Creates a new `ClusterState` pub(crate) fn new( leader: ServerId, diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index 99a0502a5..9636b7f56 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -12,7 +12,7 @@ use crate::{ rpc::{self, connect::ConnectApi, CurpError, FetchMembershipRequest, MembershipResponse}, }; -use super::cluster_state::{ClusterState, ClusterStateInit, ClusterStateReady, ForEachServer}; +use super::cluster_state::{ClusterState, ClusterStateFull, ClusterStateInit, ForEachServer}; use super::config::Config; /// Connect to cluster @@ -74,7 +74,7 @@ impl Fetch { pub(crate) async fn fetch_cluster( &self, state: impl Into, - ) -> Result<(ClusterStateReady, MembershipResponse), CurpError> { + ) -> Result<(ClusterStateFull, MembershipResponse), CurpError> { let resp = self .fetch_one(&state.into()) .await @@ -109,13 +109,13 @@ impl Fetch { pub(crate) fn build_cluster_state_from_response( connect_to: &dyn ConnectToCluster, resp: MembershipResponse, - ) -> ClusterStateReady { + ) -> ClusterStateFull { let connects = (connect_to)(&resp); - ClusterStateReady::new(resp.leader_id, resp.term, connects, resp.into_membership()) + ClusterStateFull::new(resp.leader_id, resp.term, connects, resp.into_membership()) } /// Fetch the term of the cluster. This ensures that the current leader is the latest. - fn fetch_term(&self, state: ClusterStateReady) -> impl Future { + fn fetch_term(&self, state: ClusterStateFull) -> impl Future { let timeout = self.timeout; let term = state.term(); let fetch_membership = move |c: Arc| async move { @@ -132,8 +132,8 @@ impl Fetch { /// Fetch cluster state from leader fn fetch_from_leader( &self, - state: &ClusterStateReady, - ) -> impl Future> { + state: &ClusterStateFull, + ) -> impl Future> { let timeout = self.timeout; let connect_to = self.connect_to.clone_box(); state.map_leader(|c| async move { @@ -186,7 +186,7 @@ mod test { use crate::{ client::{ - cluster_state::{ClusterState, ClusterStateInit, ClusterStateReady, ForEachServer}, + cluster_state::{ClusterState, ClusterStateFull, ClusterStateInit, ForEachServer}, config::Config, tests::init_mocked_connects, }, @@ -353,7 +353,7 @@ mod test { vec![(0..4).collect()], (0..4).map(|i| (i, NodeMetadata::default())).collect(), ); - let cluster_state = ClusterStateReady::new(0, 1, connects, membership); + let cluster_state = ClusterStateFull::new(0, 1, connects, membership); let (_, res) = fetch.fetch_cluster(cluster_state).await.unwrap(); assert_eq!(res.members[0].set, vec![0, 1, 2, 3, 4]); assert_eq!(res.leader_id, 0); @@ -387,7 +387,7 @@ mod test { vec![(0..4).collect()], (0..4).map(|i| (i, NodeMetadata::default())).collect(), ); - let cluster_state = ClusterStateReady::new(0, 1, connects, membership); + let cluster_state = ClusterStateFull::new(0, 1, connects, membership); let (_, res) = fetch.fetch_cluster(cluster_state).await.unwrap(); assert_eq!(res.members[0].set, vec![0, 1, 2, 3, 4]); assert_eq!(res.leader_id, 2); @@ -420,7 +420,7 @@ mod test { vec![(0..4).collect()], (0..4).map(|i| (i, NodeMetadata::default())).collect(), ); - let cluster_state = ClusterStateReady::new(0, 1, connects, membership); + let cluster_state = ClusterStateFull::new(0, 1, connects, membership); let (_, res) = fetch.fetch_cluster(cluster_state).await.unwrap(); assert_eq!(res.members[0].set, vec![0, 1, 2, 3, 4]); assert_eq!(res.leader_id, 2); @@ -453,7 +453,7 @@ mod test { vec![(0..5).collect()], (0..5).map(|i| (i, NodeMetadata::default())).collect(), ); - let cluster_state = ClusterStateReady::new(0, 1, connects, membership); + let cluster_state = ClusterStateFull::new(0, 1, connects, membership); fetch.fetch_cluster(cluster_state).await.unwrap_err(); } } diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index 9dfeae19c..1e35085f1 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -16,7 +16,7 @@ use tokio::{sync::broadcast, task::JoinHandle}; use tracing::{debug, info, warn}; use super::{ - cluster_state::{ClusterState, ClusterStateReady}, + cluster_state::{ClusterState, ClusterStateFull}, fetch::Fetch, retry::ClusterStateShared, }; @@ -131,7 +131,7 @@ impl KeepAlive { pub(crate) async fn keep_alive_with( &self, client_id: u64, - cluster_state: ClusterStateReady, + cluster_state: ClusterStateFull, ) -> Result { cluster_state .map_leader(|conn| async move { @@ -331,7 +331,7 @@ mod tests { ) }) .collect(); - let state = ClusterState::Ready(ClusterStateReady::new( + let state = ClusterState::Full(ClusterStateFull::new( leader, term, connects.clone(), diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 83222c93d..83dd877e2 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -292,9 +292,9 @@ impl ClientBuilder { .collect(); let membership = Membership::new(vec![member_ids], metas); let cluster_state = - cluster_state::ClusterStateReady::new(leader_id, term, connects, membership); + cluster_state::ClusterStateFull::new(leader_id, term, connects, membership); - ClusterState::Ready(cluster_state) + ClusterState::Full(cluster_state) } } } diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index ca9f626b2..7f804782b 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -10,7 +10,7 @@ use parking_lot::RwLock; use tracing::{debug, warn}; use super::{ - cluster_state::{ClusterState, ClusterStateInit, ClusterStateReady}, + cluster_state::{ClusterState, ClusterStateInit, ClusterStateFull}, config::Config, connect::{ProposeResponse, RepeatableClientApi}, fetch::Fetch, @@ -114,7 +114,7 @@ pub(crate) struct Context { /// First incomplete sequence first_incomplete: u64, /// The current cluster state - cluster_state: ClusterStateReady, + cluster_state: ClusterStateFull, } impl Context { @@ -122,7 +122,7 @@ impl Context { pub(crate) fn new( propose_id: ProposeId, first_incomplete: u64, - cluster_state: ClusterStateReady, + cluster_state: ClusterStateFull, ) -> Self { Self { propose_id, @@ -142,7 +142,7 @@ impl Context { } /// Returns the current client id - pub(crate) fn cluster_state(&self) -> ClusterStateReady { + pub(crate) fn cluster_state(&self) -> ClusterStateFull { self.cluster_state.clone() } } @@ -205,27 +205,27 @@ impl ClusterStateShared { /// Fetch and updates current state /// /// Returns the fetched cluster state - pub(crate) async fn fetch_and_update(&self) -> Result { + pub(crate) async fn fetch_and_update(&self) -> Result { let current = self.inner.read().clone(); let (new_state, _) = self.fetch.fetch_cluster(current).await?; - *self.inner.write() = ClusterState::Ready(new_state.clone()); + *self.inner.write() = ClusterState::Full(new_state.clone()); debug!("cluster state updates to: {new_state:?}"); Ok(new_state) } /// Retrieves the cluster state if it's ready, or fetches and updates it if not. - pub(crate) async fn ready_or_fetch(&self) -> Result { + pub(crate) async fn ready_or_fetch(&self) -> Result { let current = self.inner.read().clone(); match current { ClusterState::Init(init) => self.fetch_and_update().await, - ClusterState::Ready(ready) => Ok(ready), + ClusterState::Full(ready) => Ok(ready), } } /// Updates the current state with the provided `ClusterStateReady`. - pub(crate) fn update_with(&self, cluster_state: ClusterStateReady) { - *self.inner.write() = ClusterState::Ready(cluster_state); + pub(crate) fn update_with(&self, cluster_state: ClusterStateFull) { + *self.inner.write() = ClusterState::Full(cluster_state); } } @@ -343,7 +343,7 @@ where async fn handle_err( &self, err: &CurpError, - cluster_state: ClusterStateReady, + cluster_state: ClusterStateFull, ) -> Result<(), tonic::Status> { match *err { // some errors that should not retry @@ -432,7 +432,7 @@ where linearizable: bool, ) -> Result { self.retry::<_, _>(|client, ctx| async move { - let (_, resp) = self.fetch.fetch_cluster(ClusterState::Ready(ctx.cluster_state())).await?; + let (_, resp) = self.fetch.fetch_cluster(ClusterState::Full(ctx.cluster_state())).await?; Ok(resp) }) .await diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index f73653c1b..4e4cca3e2 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -14,7 +14,7 @@ use utils::ClientTlsConfig; use super::{cluster_state::ClusterState, config::Config, unary::Unary}; use crate::{ client::{ - cluster_state::ClusterStateReady, + cluster_state::ClusterStateFull, connect::RepeatableClientApi, fetch::Fetch, keep_alive::KeepAlive, @@ -151,7 +151,7 @@ async fn test_unary_propose_fast_path_works() { }); }); let unary = init_unary_client(None, None); - let cluster_state = ClusterStateReady::new(0, 1, connects, build_default_membership()); + let cluster_state = ClusterStateFull::new(0, 1, connects, build_default_membership()); let ctx = Context::new(ProposeId::default(), 0, cluster_state); let res = unary .propose(&TestCommand::new_put(vec![1], 1), None, true, ctx) @@ -187,7 +187,7 @@ async fn test_unary_propose_slow_path_works() { }); let unary = init_unary_client(None, None); - let cluster_state = ClusterStateReady::new(0, 1, connects, build_default_membership()); + let cluster_state = ClusterStateFull::new(0, 1, connects, build_default_membership()); let ctx = Context::new(ProposeId::default(), 0, cluster_state); let start_at = Instant::now(); let res = unary @@ -233,7 +233,7 @@ async fn test_unary_propose_fast_path_fallback_slow_path() { }); let unary = init_unary_client(None, None); - let cluster_state = ClusterStateReady::new(0, 1, connects, build_default_membership()); + let cluster_state = ClusterStateFull::new(0, 1, connects, build_default_membership()); let ctx = Context::new(ProposeId::default(), 0, cluster_state); let start_at = Instant::now(); let res = unary @@ -281,7 +281,7 @@ async fn test_unary_propose_return_early_err() { }); let unary = init_unary_client(None, None); - let cluster_state = ClusterStateReady::new(0, 1, connects, build_default_membership()); + let cluster_state = ClusterStateFull::new(0, 1, connects, build_default_membership()); let ctx = Context::new(ProposeId::default(), 0, cluster_state); let err = unary .propose(&TestCommand::new_put(vec![1], 1), None, true, ctx) @@ -320,13 +320,13 @@ async fn test_retry_propose_return_no_retry_error() { }); let unary = init_unary_client(None, None); - let cluster_state = ClusterStateReady::new(0, 1, connects, build_default_membership()); + let cluster_state = ClusterStateFull::new(0, 1, connects, build_default_membership()); let retry = Retry::new( unary, RetryConfig::new_fixed(Duration::from_millis(100), 5), KeepAlive::new(Duration::from_secs(1)), Fetch::new_disable(), - ClusterState::Ready(cluster_state), + ClusterState::Full(cluster_state), ); let err = retry .propose(&TestCommand::new_put(vec![1], 1), None, false) @@ -362,13 +362,13 @@ async fn test_retry_propose_return_retry_error() { }); let unary = init_unary_client(None, None); let cluster_state = - ClusterStateReady::new(0, 1, connects.clone(), build_default_membership()); + ClusterStateFull::new(0, 1, connects.clone(), build_default_membership()); let retry = Retry::new( unary, RetryConfig::new_fixed(Duration::from_millis(10), 5), KeepAlive::new(Duration::from_secs(1)), Fetch::new(Duration::from_secs(1), move |_| connects.clone()), - ClusterState::Ready(cluster_state), + ClusterState::Full(cluster_state), ); let _err = retry .propose(&TestCommand::new_put(vec![1], 1), None, false) @@ -404,7 +404,7 @@ async fn test_read_index_success() { }); let unary = init_unary_client(None, None); - let cluster_state = ClusterStateReady::new(0, 1, connects, build_default_membership()); + let cluster_state = ClusterStateFull::new(0, 1, connects, build_default_membership()); let ctx = Context::new(ProposeId::default(), 0, cluster_state); let res = unary .propose(&TestCommand::default(), None, true, ctx) @@ -439,7 +439,7 @@ async fn test_read_index_fail() { }); }); let unary = init_unary_client(None, None); - let cluster_state = ClusterStateReady::new(0, 1, connects, build_default_membership()); + let cluster_state = ClusterStateFull::new(0, 1, connects, build_default_membership()); let ctx = Context::new(ProposeId::default(), 0, cluster_state); let res = unary .propose(&TestCommand::default(), None, true, ctx) From 4ca4057d858bd8e86520c95e56f9a507998dcb96 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 11 Oct 2024 10:48:59 +0800 Subject: [PATCH 244/322] test: add tests for ClusterState Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/cluster_state.rs | 205 ++++++++++++++++++++++++ crates/curp/src/client/tests.rs | 2 +- 2 files changed, 206 insertions(+), 1 deletion(-) diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index 18ce95e84..b322979df 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -251,3 +251,208 @@ impl ClusterStateFull { self.membership.version() } } + +#[cfg(test)] +mod test { + use std::time::Duration; + + use curp_test_utils::test_cmd::TestCommand; + use tonic::Response; + use tracing_test::traced_test; + + use crate::{ + client::tests::{build_default_membership, init_mocked_connects}, + rpc::{NodeMetadata, ProposeId, RecordRequest, RecordResponse}, + }; + + use super::*; + + #[traced_test] + #[tokio::test] + async fn test_cluster_state_full_map_leader_ok() { + let connects = init_mocked_connects(5, |id, conn| { + match id { + 0 => { + conn.expect_record().returning(|_req, _timeout| { + Ok(Response::new(RecordResponse { conflict: true })) + }); + } + 1 | 2 | 3 | 4 => { + conn.expect_record().returning(|_req, _timeout| { + Ok(Response::new(RecordResponse { conflict: false })) + }); + } + _ => unreachable!("there are only 5 nodes"), + }; + }); + let req = RecordRequest::new(ProposeId::default(), &TestCommand::default()); + let membership = build_default_membership(); + let state = ClusterStateFull::new(0, 1, connects, membership); + let conflict = state + .map_leader(move |conn| async move { conn.record(req, Duration::from_secs(1)).await }) + .await + .unwrap() + .into_inner() + .conflict; + + assert!(conflict); + } + + #[traced_test] + #[tokio::test] + async fn test_cluster_state_full_map_server_ok() { + let connects = init_mocked_connects(5, |id, conn| { + match id { + 2 => { + conn.expect_record().returning(|_req, _timeout| { + Ok(Response::new(RecordResponse { conflict: true })) + }); + } + 0 | 1 | 3 | 4 => { + conn.expect_record().returning(|_req, _timeout| { + Ok(Response::new(RecordResponse { conflict: false })) + }); + } + _ => unreachable!("there are only 5 nodes"), + }; + }); + let req = RecordRequest::new(ProposeId::default(), &TestCommand::default()); + let membership = build_default_membership(); + let state = ClusterStateFull::new(0, 1, connects, membership); + let conflict = state + .map_server(2, move |conn| async move { + conn.record(req, Duration::from_secs(1)).await + }) + .unwrap() + .await + .unwrap() + .into_inner() + .conflict; + + assert!(conflict); + } + + #[traced_test] + #[tokio::test] + async fn test_cluster_state_full_for_each_follower_ok() { + let connects = init_mocked_connects(5, |id, conn| { + match id { + 0 => { + conn.expect_record().returning(|_req, _timeout| { + Ok(Response::new(RecordResponse { conflict: true })) + }); + } + 1 | 2 | 3 | 4 => { + conn.expect_record().returning(|_req, _timeout| { + Ok(Response::new(RecordResponse { conflict: false })) + }); + } + _ => unreachable!("there are only 5 nodes"), + }; + }); + let req = RecordRequest::new(ProposeId::default(), &TestCommand::default()); + let membership = build_default_membership(); + let state = ClusterStateFull::new(0, 1, connects, membership); + let conflicts: Vec<_> = state + .for_each_follower({ + move |conn| { + let req = req.clone(); + async move { conn.record(req, Duration::from_secs(1)).await } + } + }) + .collect::>() + .await + .into_iter() + .map(|r| r.unwrap().into_inner().conflict) + .collect(); + + assert_eq!(conflicts.len(), 4); + assert!(conflicts.into_iter().all(|c| !c)); + } + + #[traced_test] + #[tokio::test] + async fn test_cluster_state_full_for_each_follower_with_quorum_ok() { + let connects = init_mocked_connects(5, |id, conn| { + match id { + 0 | 1 => { + conn.expect_record().returning(|_req, _timeout| { + Ok(Response::new(RecordResponse { conflict: false })) + }); + } + 2 | 3 | 4 => { + conn.expect_record().returning(|_req, _timeout| { + Ok(Response::new(RecordResponse { conflict: true })) + }); + } + _ => unreachable!("there are only 5 nodes"), + }; + }); + let req = RecordRequest::new(ProposeId::default(), &TestCommand::default()); + let membership = build_default_membership(); + let state = ClusterStateFull::new(0, 1, connects, membership); + let record = move |conn: Arc| { + let req = req.clone(); + async move { conn.record(req, Duration::from_secs(1)).await } + }; + + let ok = state + .for_each_follower_with_quorum( + record, + |res| res.is_ok_and(|resp| resp.get_ref().conflict), + |qs, ids| QuorumSet::is_quorum(qs, ids), + ) + .await; + + assert!(ok); + } + + #[traced_test] + #[tokio::test] + async fn test_cluster_state_full_for_each_server_ok() { + let connects = init_mocked_connects(5, |id, conn| { + match id { + 0 | 1 | 2 | 3 | 4 => { + conn.expect_record().returning(|_req, _timeout| { + Ok(Response::new(RecordResponse { conflict: false })) + }); + } + _ => unreachable!("there are only 5 nodes"), + }; + }); + let req = RecordRequest::new(ProposeId::default(), &TestCommand::default()); + let membership = build_default_membership(); + let state = ClusterStateFull::new(0, 1, connects, membership); + let record = move |conn: Arc| { + let req = req.clone(); + async move { conn.record(req, Duration::from_secs(1)).await } + }; + + let conflicts: Vec<_> = state.for_each_server(record).collect().await; + assert_eq!(conflicts.len(), 5); + } + + #[traced_test] + #[tokio::test] + async fn test_cluster_state_init_for_each_server_ok() { + let connects = init_mocked_connects(5, |id, conn| { + match id { + 0 | 1 | 2 | 3 | 4 => { + conn.expect_record().returning(|_req, _timeout| { + Ok(Response::new(RecordResponse { conflict: false })) + }); + } + _ => unreachable!("there are only 5 nodes"), + }; + }); + let req = RecordRequest::new(ProposeId::default(), &TestCommand::default()); + let state = ClusterStateInit::new(connects.into_values().collect()); + let record = move |conn: Arc| { + let req = req.clone(); + async move { conn.record(req, Duration::from_secs(1)).await } + }; + + let conflicts: Vec<_> = state.for_each_server(record).collect().await; + assert_eq!(conflicts.len(), 5); + } +} diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index 4e4cca3e2..643bbb3f2 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -85,7 +85,7 @@ fn build_empty_response() -> OpResponse { OpResponse { op: None } } -fn build_default_membership() -> Membership { +pub(super) fn build_default_membership() -> Membership { let members = (0..5).collect::>(); let nodes = members .iter() From 535344cb2ecaff3622717821157077e4fd0a4569 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 11 Oct 2024 11:06:13 +0800 Subject: [PATCH 245/322] test: add tests for KeepAlive Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/keep_alive.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index 1e35085f1..8b40c501d 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -367,4 +367,17 @@ mod tests { .unwrap_err(); assert_eq!(keep_alive.wait_id_update(0).await, 10); } + + #[traced_test] + #[tokio::test] + async fn test_stream_client_keep_alive_on_cluster_shutdown() { + let connects = init_mocked_stream_connects(5, 0, 2, move |client_id| { + Box::pin(async move { Err(CurpError::ShuttingDown(())) }) + }); + let mut keep_alive = init_stream_client(connects, 1, 1); + /// handle should exit on shutdown + tokio::time::timeout(Duration::from_millis(10), &mut keep_alive.handle) + .await + .unwrap(); + } } From 4d33f77e5dc32c4128da1e8b8d937fc9763dfc7d Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 11 Oct 2024 15:14:06 +0800 Subject: [PATCH 246/322] test: add tests for Retry Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/cluster_state.rs | 6 ++ crates/curp/src/client/retry.rs | 17 +++++ crates/curp/src/client/tests.rs | 84 ++++++++++++++++++++++--- 3 files changed, 98 insertions(+), 9 deletions(-) diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index b322979df..48c24e855 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -250,6 +250,12 @@ impl ClusterStateFull { pub(crate) fn cluster_version(&self) -> u64 { self.membership.version() } + + /// Returns the membership of the state + #[cfg(test)] + pub(crate) fn membership(&self) -> &Membership { + &self.membership + } } #[cfg(test)] diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 7f804782b..a4d08baec 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -227,6 +227,17 @@ impl ClusterStateShared { pub(crate) fn update_with(&self, cluster_state: ClusterStateFull) { *self.inner.write() = ClusterState::Full(cluster_state); } + + /// Retrieves the cluster state + #[cfg(test)] + pub(crate) fn unwrap_full_state(&self) -> ClusterStateFull { + let current = self.inner.read().clone(); + match current { + ClusterState::Init(_) => unreachable!("initial state"), + ClusterState::Full(ready) => ready, + } + } + } /// The retry client automatically retry the requests of the inner client api @@ -376,6 +387,12 @@ where Ok(()) } + + /// Returns the shared cluster state + #[cfg(test)] + pub(crate) fn cluster_state(&self) -> &ClusterStateShared { + &self.cluster_state + } } #[async_trait] diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index 643bbb3f2..0e25c146a 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -26,8 +26,8 @@ use crate::{ rpc::{ self, connect::{ConnectApi, MockConnectApi}, - CurpError, MembershipResponse, Node, NodeMetadata, OpResponse, ProposeId, ProposeResponse, - ReadIndexResponse, RecordResponse, ResponseOp, SyncedResponse, + Change, CurpError, MembershipResponse, Node, NodeMetadata, OpResponse, ProposeId, + ProposeResponse, ReadIndexResponse, RecordResponse, ResponseOp, SyncedResponse, }, }; @@ -89,12 +89,7 @@ pub(super) fn build_default_membership() -> Membership { let members = (0..5).collect::>(); let nodes = members .iter() - .map(|id| { - ( - *id, - NodeMetadata::new(format!("{id}"), vec!["addr"], vec!["addr"]), - ) - }) + .map(|id| (*id, NodeMetadata::default())) .collect(); Membership::new(vec![members], nodes) } @@ -103,6 +98,7 @@ fn build_membership_resp( leader_id: Option, term: u64, members: impl IntoIterator, + learners: impl IntoIterator, ) -> Result, CurpError> { let leader_id = leader_id.ok_or(CurpError::leader_transfer("no current leader"))?; @@ -110,6 +106,7 @@ fn build_membership_resp( let nodes: Vec = members .clone() .into_iter() + .chain(learners) .map(|node_id| Node { node_id, meta: Some(NodeMetadata::default()), @@ -347,7 +344,7 @@ async fn test_retry_propose_return_retry_error() { let connects = init_mocked_connects(5, |id, conn| { conn.expect_fetch_membership() .returning(move |_req, _timeout| { - build_membership_resp(Some(0), 2, vec![0, 1, 2, 3, 4]) + build_membership_resp(Some(0), 2, vec![0, 1, 2, 3, 4], []) }); if id == 0 { let err = early_err.clone(); @@ -377,6 +374,75 @@ async fn test_retry_propose_return_retry_error() { } } +#[traced_test] +#[tokio::test] +async fn test_retry_will_update_state_on_error() { + let connects = init_mocked_connects(5, |_id, conn| { + conn.expect_propose_stream() + .returning(move |_req, _token, _timeout| Err(CurpError::wrong_cluster_version())); + + conn.expect_fetch_membership() + .returning(move |_req, _timeout| { + build_membership_resp(Some(0), 1, vec![0, 1, 2, 3], [4]) + }); + }); + let unary = init_unary_client(None, None); + let cluster_state = ClusterStateFull::new(0, 1, connects.clone(), build_default_membership()); + let retry = Retry::new( + unary, + RetryConfig::new_fixed(Duration::from_millis(10), 5), + KeepAlive::new(Duration::from_secs(1)), + Fetch::new(Duration::from_secs(1), move |_| connects.clone()), + ClusterState::Full(cluster_state), + ); + let _err = retry + .propose(&TestCommand::new_put(vec![1], 1), None, false) + .await + .unwrap_err(); + + // The state should update to the new membership + let state = retry.cluster_state().unwrap_full_state(); + let members = (0..4).collect::>(); + let nodes = (0..5).map(|id| (id, NodeMetadata::default())).collect(); + let expect_membership = Membership::new(vec![members], nodes); + assert_eq!(*state.membership(), expect_membership); +} + +#[traced_test] +#[tokio::test] +async fn test_retry_will_update_state_on_change_membership() { + let connects = init_mocked_connects(5, |_id, conn| { + conn.expect_fetch_membership() + .returning(move |_req, _timeout| { + build_membership_resp(Some(0), 2, vec![0, 1, 2, 3, 4], []) + }); + conn.expect_change_membership() + .returning(move |_req, _timeout| { + build_membership_resp(Some(0), 2, vec![0, 1, 2, 3], [4]) + }); + }); + let unary = init_unary_client(None, None); + let cluster_state = ClusterStateFull::new(0, 1, connects, build_default_membership()); + let retry = Retry::new( + unary, + RetryConfig::new_fixed(Duration::from_millis(10), 5), + KeepAlive::new(Duration::from_secs(1)), + Fetch::new_disable(), + ClusterState::Full(cluster_state), + ); + + retry + .change_membership(vec![Change::Demote(4)]) + .await + .unwrap(); + // The state should update to the changed membership + let state = retry.cluster_state().unwrap_full_state(); + let members = (0..4).collect::>(); + let nodes = (0..5).map(|id| (id, NodeMetadata::default())).collect(); + let expect_membership = Membership::new(vec![members], nodes); + assert_eq!(*state.membership(), expect_membership); +} + #[traced_test] #[tokio::test] async fn test_read_index_success() { From 66260724f29599ec69403a7cd4676f90c78b1f12 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 11 Oct 2024 17:16:35 +0800 Subject: [PATCH 247/322] test: add tests for NodeStates Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/raw_curp/node_state.rs | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/crates/curp/src/server/raw_curp/node_state.rs b/crates/curp/src/server/raw_curp/node_state.rs index bfe6ff456..ce172bdcd 100644 --- a/crates/curp/src/server/raw_curp/node_state.rs +++ b/crates/curp/src/server/raw_curp/node_state.rs @@ -230,3 +230,90 @@ impl NodeState { (connect, sync_event, remove_event) } } +#[cfg(test)] +mod tests { + use utils::parking_lot_lock::RwLockMap; + + use super::*; + use crate::rpc::connect::{InnerConnectApiWrapper, MockInnerConnectApi}; + use std::sync::Arc; + + fn build_new_connect(id: u64) -> InnerConnectApiWrapper { + let mut connect = MockInnerConnectApi::new(); + connect.expect_id().returning(move || id); + InnerConnectApiWrapper::new_from_arc(Arc::new(connect)) + } + + fn build_initial_node_states() -> NodeStates { + let init = (0..3).map(|id| (id, build_new_connect(id))); + let node_states = NodeStates::new_from_connects(init); + let ids: Vec<_> = node_states.states.map_read(|s| s.keys().copied().collect()); + assert_eq!(ids, [0, 1, 2]); + node_states + } + + #[test] + fn test_node_state_update_case0() { + let node_states = build_initial_node_states(); + node_states.update_match_index(2, 1); + node_states.update_next_index(2, 2); + + // adds some nodes + let new_connects = (0..5).map(|id| (id, build_new_connect(id))).collect(); + let new_states = node_states.update_with(new_connects); + assert_eq!(new_states.keys().copied().collect::>(), [3, 4]); + + let ids: Vec<_> = node_states.states.map_read(|s| s.keys().copied().collect()); + assert_eq!(ids, [0, 1, 2, 3, 4]); + // makes sure that index won't be override + assert_eq!(node_states.get_match_index(2), Some(1)); + assert_eq!(node_states.get_next_index(2), Some(2)); + } + + #[test] + fn test_node_state_update_case1() { + let node_states = build_initial_node_states(); + + // remove some nodes + let new_connects = (0..2).map(|id| (id, build_new_connect(id))).collect(); + let new_states = node_states.update_with(new_connects); + assert_eq!(new_states.keys().count(), 0); + + let ids: Vec<_> = node_states.states.map_read(|s| s.keys().copied().collect()); + assert_eq!(ids, [0, 1]); + } + + #[test] + fn test_update_and_get_indices() { + let node_states = build_initial_node_states(); + node_states.update_match_index(0, 1); + node_states.update_match_index(1, 2); + node_states.update_match_index(2, 3); + + node_states.update_next_index(0, 1); + node_states.update_next_index(1, 2); + node_states.update_next_index(2, 3); + + assert_eq!(node_states.get_match_index(0), Some(1)); + assert_eq!(node_states.get_match_index(1), Some(2)); + assert_eq!(node_states.get_match_index(2), Some(3)); + + assert_eq!(node_states.get_next_index(0), Some(1)); + assert_eq!(node_states.get_next_index(1), Some(2)); + assert_eq!(node_states.get_next_index(2), Some(3)); + } + + #[test] + fn test_map_status() { + let node_states = build_initial_node_states(); + let ids: Vec<_> = node_states.map_status(|(id, _status)| *id).collect(); + assert_eq!(ids, vec![0, 1, 2]); + } + + #[test] + fn test_get_connects() { + let node_states = build_initial_node_states(); + let ids: Vec<_> = node_states.connects(&[1, 2]).map(|c| c.id()).collect(); + assert_eq!(ids, vec![1, 2]); + } +} From 0b95dc2f0b59aeafaa26dd4c6c95e7ec62beda17 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 11 Oct 2024 17:31:26 +0800 Subject: [PATCH 248/322] test: add tests for membership persistent Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 4 ++-- crates/curp/src/server/storage/db.rs | 32 ++++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 761062f07..96eda8c0c 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -113,7 +113,7 @@ impl NodeMembershipState { } /// Membership state stored in current node -#[derive(Clone, Serialize, Deserialize, Debug, Default)] +#[derive(Clone, Serialize, Deserialize, Debug, Default, PartialEq, Eq)] pub struct MembershipState { /// Membership entries entries: Vec, @@ -122,7 +122,7 @@ pub struct MembershipState { #[allow(clippy::unwrap_used)] // `entries` should contains at least one entry impl MembershipState { /// Creates a new `MembershipState` - fn new(initial_membership: Membership) -> Self { + pub(crate) fn new(initial_membership: Membership) -> Self { let initial_entry = MembershipEntry::new(0, initial_membership); Self { entries: vec![initial_entry], diff --git a/crates/curp/src/server/storage/db.rs b/crates/curp/src/server/storage/db.rs index fddee997a..267d3c084 100644 --- a/crates/curp/src/server/storage/db.rs +++ b/crates/curp/src/server/storage/db.rs @@ -130,14 +130,21 @@ impl DB { #[cfg(test)] mod tests { - use std::{error::Error, sync::Arc}; + use std::{ + collections::{BTreeMap, BTreeSet}, + error::Error, + sync::Arc, + }; use curp_test_utils::{sleep_secs, test_cmd::TestCommand}; use test_macros::abort_on_panic; use tokio::fs::remove_dir_all; use super::*; - use crate::rpc::ProposeId; + use crate::{ + member::Membership, + rpc::{NodeMetadata, ProposeId}, + }; #[tokio::test] #[abort_on_panic] @@ -173,4 +180,25 @@ mod tests { Ok(()) } + + #[test] + fn put_and_recover_membership() { + let db_dir = tempfile::tempdir().unwrap().into_path(); + let storage_cfg = EngineConfig::RocksDB(db_dir.clone()); + let membership = Membership::new( + vec![BTreeSet::from([1])], + BTreeMap::from([(1, NodeMetadata::default())]), + ); + let ms = MembershipState::new(membership); + { + let s = DB::::open(&storage_cfg).unwrap(); + s.put_membership(1, &ms).unwrap(); + } + { + let s = DB::::open(&storage_cfg).unwrap(); + let (id, ms_recovered) = s.recover_membership().unwrap().unwrap(); + assert_eq!(id, 1); + assert_eq!(ms, ms_recovered); + } + } } From c5ea1ba0cfbb0acd997970385ebbbe217fd6739c Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 11 Oct 2024 17:50:44 +0800 Subject: [PATCH 249/322] refactor: implement membership recovery Adds a new `MembershipConfig` enum. The membership states will be recovered during CurpNode initialization. Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 46 ++++++++++++++++++++---- crates/curp/src/server/curp_node/mod.rs | 14 +++++--- crates/curp/src/server/raw_curp/mod.rs | 6 ++-- crates/curp/src/server/raw_curp/tests.rs | 8 +++-- 4 files changed, 57 insertions(+), 17 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 96eda8c0c..54d196660 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -14,6 +14,32 @@ use crate::quorum::QuorumSet; use crate::rpc::Change; use crate::rpc::NodeMetadata; +/// Represents the configuration of a membership. +#[derive(Debug, Clone)] +#[non_exhaustive] +pub enum MembershipConfig { + /// Initial membership information. + Init(MembershipInfo), + /// Recovered membership state. + Recovered((u64, MembershipState)), +} + +impl MembershipConfig { + /// Returns all members in this config + #[inline] + #[must_use] + pub fn members(&self) -> BTreeMap { + match *self { + MembershipConfig::Init(ref conf) => conf.init_members.clone(), + MembershipConfig::Recovered((_, ref conf)) => conf + .effective() + .members() + .map(|(id, meta)| (id, meta.clone())) + .collect(), + } + } +} + /// The membership info, used to build the initial states #[derive(Debug, Clone)] #[non_exhaustive] @@ -58,12 +84,20 @@ pub(crate) struct NodeMembershipState { impl NodeMembershipState { /// Creates a new `NodeMembershipState` with initial state - pub(crate) fn new(info: MembershipInfo) -> Self { - let node_id = info.node_id; - let cluster_state = MembershipState::new(info.into_membership()); - Self { - node_id, - cluster_state, + pub(crate) fn new(config: MembershipConfig) -> Self { + match config { + MembershipConfig::Init(info) => { + let node_id = info.node_id; + let cluster_state = MembershipState::new(info.into_membership()); + Self { + node_id, + cluster_state, + } + } + MembershipConfig::Recovered((node_id, cluster_state)) => Self { + node_id, + cluster_state, + }, } } diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index b9f963160..f2fede65e 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -37,7 +37,7 @@ use super::{ use crate::{ cmd::{Command, CommandExecutor}, log_entry::{EntryData, LogEntry}, - member::MembershipInfo, + member::{MembershipConfig, MembershipInfo}, response::ResponseSender, role_change::RoleChange, rpc::{ @@ -729,9 +729,13 @@ impl, RC: RoleChange> CurpNode { sps: Vec>, ucps: Vec>, ) -> Result { - let peer_addrs: HashMap<_, _> = membership_info - .init_members - .clone() + let ms = storage.recover_membership()?; + let membership_config = ms.map_or( + MembershipConfig::Init(membership_info), + MembershipConfig::Recovered, + ); + let peer_addrs: HashMap<_, _> = membership_config + .members() .into_iter() .map(|(id, meta)| (id, meta.into_peer_urls())) .collect(); @@ -765,7 +769,7 @@ impl, RC: RoleChange> CurpNode { .as_tx(as_tx.clone()) .resp_txs(Arc::new(Mutex::default())) .id_barrier(Arc::new(IdBarrier::new())) - .membership_info(membership_info) + .membership_config(membership_config) .member_connects(member_connects) .build_raw_curp() .map_err(|e| CurpError::internal(format!("build raw curp failed, {e}")))?, diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index e06eeff88..f5226593d 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -64,7 +64,7 @@ use crate::cmd::Command; use crate::log_entry::EntryData; use crate::log_entry::LogEntry; use crate::member::Membership; -use crate::member::MembershipInfo; +use crate::member::MembershipConfig; use crate::member::NodeMembershipState; use crate::members::ServerId; use crate::quorum::QuorumSet; @@ -123,7 +123,7 @@ pub struct RawCurp { #[builder(name = "RawCurpBuilder")] pub(super) struct RawCurpArgs { /// Membership information - membership_info: MembershipInfo, + membership_config: MembershipConfig, /// Member connects member_connects: BTreeMap, /// Current node is leader or not @@ -207,7 +207,7 @@ impl RawCurpBuilder { log, ctx, task_manager: args.task_manager, - ms: RwLock::new(NodeMembershipState::new(args.membership_info)), + ms: RwLock::new(NodeMembershipState::new(args.membership_config)), }; if args.is_leader { diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index 0df65bea2..fd6bb0a04 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -9,6 +9,7 @@ use utils::config::{ use super::*; use crate::{ + member::MembershipInfo, rpc::{self, Change, Node, NodeMetadata, Redirect}, server::{ cmd_board::CommandBoard, @@ -65,8 +66,9 @@ impl RawCurp { .map(|id| (id, NodeMetadata::new(format!("S{id}"), ["addr"], ["addr"]))) .collect(); let membership_info = MembershipInfo::new(0, init_members); - let peer_addrs: HashMap<_, _> = membership_info - .init_members + let membership_config = MembershipConfig::Init(membership_info); + let peer_addrs: HashMap<_, _> = membership_config + .members() .clone() .into_iter() .map(|(id, meta)| (id, meta.into_peer_urls())) @@ -86,7 +88,7 @@ impl RawCurp { .as_tx(as_tx) .resp_txs(resp_txs) .id_barrier(id_barrier) - .membership_info(membership_info) + .membership_config(membership_config) .member_connects(member_connects) .build_raw_curp() .unwrap() From ac5fdc7eb67a4e647f30f23d463bb3b45ddf885a Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 11 Oct 2024 22:22:35 +0800 Subject: [PATCH 250/322] test: add tests for raw_curp::member_impl Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- .../curp/src/server/raw_curp/member_impl.rs | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 0e8881b3d..ac59d9657 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -107,3 +107,65 @@ impl RawCurp { } } } + +#[cfg(test)] +mod test { + use std::sync::Arc; + + use curp_test_utils::mock_role_change; + use utils::task_manager::TaskManager; + + use crate::rpc::NodeMetadata; + + use super::*; + + #[test] + fn test_update_membership_state_ok() { + let curp = RawCurp::new_test(3, mock_role_change(), Arc::new(TaskManager::new())); + let membership1 = Membership::new( + vec![(0..4).collect()], + (0..4).map(|id| (id, NodeMetadata::default())).collect(), + ); + let membership2 = Membership::new( + vec![(0..5).collect()], + (0..5).map(|id| (id, NodeMetadata::default())).collect(), + ); + + curp.update_membership_state(None, [(1, membership1.clone())], None); + assert_eq!(*curp.ms.read().cluster().effective(), membership1); + curp.update_membership_state(None, [(2, membership2.clone())], None); + assert_eq!(*curp.ms.read().cluster().effective(), membership2); + curp.update_membership_state(Some(1), [], None); + assert_eq!(*curp.ms.read().cluster().effective(), membership1); + + curp.update_membership_state(None, [(2, membership2.clone())], None); + curp.update_membership_state(None, [], Some(2)); + assert_eq!(*curp.ms.read().cluster().effective(), membership2); + assert_eq!(*curp.ms.read().cluster().committed(), membership2); + } + + #[test] + fn test_update_role_ok() { + let curp = RawCurp::new_test(3, mock_role_change(), Arc::new(TaskManager::new())); + assert_eq!(curp.st.read().role, Role::Leader); + // self is 0 + let membership1 = Membership::new( + vec![(1..3).collect()], + (1..3).map(|id| (id, NodeMetadata::default())).collect(), + ); + let membership2 = Membership::new( + vec![(0..3).collect()], + (0..3).map(|id| (id, NodeMetadata::default())).collect(), + ); + + // remove from membership + curp.update_membership_state(None, [(1, membership1.clone())], None); + curp.update_role(); + assert_eq!(curp.st.read().role, Role::Learner); + + // add back + curp.update_membership_state(None, [(2, membership2.clone())], None); + curp.update_role(); + assert_eq!(curp.st.read().role, Role::Follower); + } +} From 3c3357b1707bd3efe2009fdad1dea7d2880940ab Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 11 Oct 2024 23:55:29 +0800 Subject: [PATCH 251/322] test: add tests for CurpNode Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- .../curp/src/server/curp_node/member_impl.rs | 222 +++++++++++++++++- crates/curp/src/server/curp_node/mod.rs | 32 ++- .../curp/src/server/raw_curp/member_impl.rs | 12 + crates/curp/src/server/raw_curp/mod.rs | 12 + crates/curp/src/server/raw_curp/node_state.rs | 6 + crates/utils/src/barrier/id.rs | 10 + crates/utils/src/task_manager/mod.rs | 8 + 7 files changed, 296 insertions(+), 6 deletions(-) diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 8efbcc06f..58567e464 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -37,11 +37,20 @@ impl, RC: RoleChange> CurpNode { &self, request: ChangeMembershipRequest, ) -> Result { - self.ensure_leader()?; let changes = request .changes .into_iter() .map(MembershipChange::into_inner); + + self.change_membership_inner(changes).await + } + + /// Performs a membership change to the cluster + pub(crate) async fn change_membership_inner( + &self, + changes: impl IntoIterator, + ) -> Result { + self.ensure_leader()?; let changes = Self::ensure_non_overlapping(changes)?; let configs = self.curp.generate_membership(changes); if configs.is_empty() { @@ -112,7 +121,7 @@ impl, RC: RoleChange> CurpNode { return Ok(changes); } - Err(CurpError::InvalidConfig(())) + Err(CurpError::InvalidMemberChange(())) } /// Ensures that the current node is the leader @@ -189,3 +198,212 @@ impl, RC: RoleChange> CurpNode { } } } + +#[cfg(test)] +mod test { + use std::time::Duration; + + use curp_test_utils::{ + mock_role_change, + test_cmd::{TestCE, TestCommand}, + TestRoleChange, + }; + use engine::MemorySnapshotAllocator; + use parking_lot::RwLock; + use tokio::sync::mpsc; + use tracing_test::traced_test; + use utils::{config::EngineConfig, task_manager::TaskManager}; + + use crate::{ + rpc::NodeMetadata, + server::{cmd_board::CommandBoard, RawCurp, StorageApi, DB}, + }; + + use super::*; + + fn build_curp_node() -> CurpNode { + let curp = Arc::new(RawCurp::new_test( + 3, + mock_role_change(), + Arc::new(TaskManager::new()), + )); + let db_dir = tempfile::tempdir().unwrap().into_path(); + let storage_cfg = EngineConfig::RocksDB(db_dir.clone()); + let db = DB::::open(&storage_cfg).unwrap(); + let (exe_tx, _exe_rx) = mpsc::unbounded_channel(); + let (tas_tx, _tas_rx) = mpsc::unbounded_channel(); + let (as_tx, _as_rx) = flume::unbounded(); + let (propose_tx, _propose_rx) = flume::unbounded(); + let ce = TestCE::new("testce".to_owned(), exe_tx, tas_tx, storage_cfg); + let _ignore = db.recover().unwrap(); + + CurpNode { + curp: Arc::clone(&curp), + cmd_board: Arc::new(RwLock::new(CommandBoard::new())), + storage: Arc::new(db), + snapshot_allocator: Box::new(MemorySnapshotAllocator::default()), + cmd_executor: Arc::new(ce), + as_tx, + propose_tx, + } + } + + #[traced_test] + #[tokio::test] + async fn test_handle_append_entries_will_update_membership() { + let curp_node = build_curp_node(); + let curp = Arc::clone(&curp_node.curp); + let init_membership = Membership::new( + vec![(0..3).collect()], + (0..3) + .map(|i| (i, NodeMetadata::new(format!("S{i}"), ["addr"], ["addr"]))) + .collect(), + ); + + let membership = Membership::new( + vec![(0..4).collect()], + (0..4) + .map(|i| (i, NodeMetadata::new(format!("S{i}"), ["addr"], ["addr"]))) + .collect(), + ); + let entry_data = EntryData::Member(membership.clone()); + let entry = LogEntry::new(1, 0, ProposeId::default(), entry_data); + + let resp = curp_node + .append_entries_inner(vec![entry.clone()], 0, 1, 0, 0, 0) + .unwrap(); + assert!(resp.success); + + // append entries should update effective membership + assert_eq!(curp.effective_membership(), membership); + // append entries should update node states + assert!(curp.node_states().contains_key(&3)); + // append entries should spawn new sync task + assert_eq!( + curp.task_manager() + .num_handles(TaskName::SyncFollower) + .unwrap(), + 1 + ); + // append entries should update the in-memory log structure + assert_eq!(*curp.get_log_from(1)[0].as_ref(), entry); + // append entries should persistent the membership + let (id, ms) = curp.persisted_membership().unwrap(); + assert_eq!(id, 0); + assert_eq!(*ms.effective(), membership); + assert_eq!(*ms.committed(), init_membership); + } + + fn commit_memberhip(curp: &RawCurp, index: u64) { + // for follower [1, 2] + for id in 1..3 { + assert!(curp + .handle_append_entries_resp(id, Some(index), 1, true, index + 1) + .unwrap()); + } + curp.trigger_all(); + } + + async fn change_membership( + curp_node: Arc>, + change: Change, + ) -> u64 { + let curp = Arc::clone(&curp_node.curp); + let mut commit_index = curp.last_log_index(); + let mut handle = + tokio::spawn(async move { curp_node.change_membership_inner([change]).await }); + // change membership should wait before commit + while { + tokio::time::timeout(Duration::from_millis(100), &mut handle) + .await + .is_err() + } { + commit_index += 1; + commit_memberhip(&curp, commit_index); + } + + commit_index + } + + //#[traced_test] + #[tokio::test] + async fn test_change_membership_will_update_membership() { + let curp_node = Arc::new(build_curp_node()); + let curp = Arc::clone(&curp_node.curp); + let init_membership = Membership::new( + vec![(0..3).collect()], + (0..3) + .map(|i| (i, NodeMetadata::new(format!("S{i}"), ["addr"], ["addr"]))) + .collect(), + ); + let change1 = Change::Add(rpc::Node::new( + 3, + NodeMetadata::new("S3".to_owned(), ["addr"], ["addr"]), + )); + let membership1 = Membership::new( + vec![(0..3).collect()], + (0..4) + .map(|i| (i, NodeMetadata::new(format!("S{i}"), ["addr"], ["addr"]))) + .collect(), + ); + let last_index = change_membership(Arc::clone(&curp_node), change1).await; + // committed one membership log entry + assert_eq!(last_index, 1); + + // append entries should update effective membership + assert_eq!(curp.effective_membership(), membership1.clone()); + // append entries should update node states + assert!(curp.node_states().contains_key(&3)); + // append entries should spawn new sync task + assert_eq!( + curp.task_manager() + .num_handles(TaskName::SyncFollower) + .unwrap(), + 1 + ); + // append entries should update the in-memory log structure + let EntryData::Member(entry) = curp.get_log_from(1)[0].as_ref().entry_data.clone() else { + unreachable!() + }; + assert_eq!(entry, membership1); + // append entries should persistent the membership + let (id, ms) = curp.persisted_membership().unwrap(); + assert_eq!(id, 0); + assert_eq!(*ms.effective(), membership1); + assert_eq!(*ms.committed(), init_membership); + + // promote the learner added previously + let change2 = Change::Promote(3); + let membership2 = Membership::new( + vec![(0..4).collect()], + (0..4) + .map(|i| (i, NodeMetadata::new(format!("S{i}"), ["addr"], ["addr"]))) + .collect(), + ); + let last_index = change_membership(curp_node, change2).await; + // committed two membership(from 2 ot 3) log entry + assert_eq!(last_index, 3); + assert_eq!(curp.effective_membership(), membership2.clone()); + } + + #[traced_test] + #[tokio::test] + async fn test_change_membership_will_reject_duplicate_ids() { + let curp_node = build_curp_node(); + let change1 = Change::Add(rpc::Node::new( + 3, + NodeMetadata::new("S3".to_owned(), ["addr"], ["addr"]), + )); + let change2 = Change::Add(rpc::Node::new( + 3, + NodeMetadata::new("S3".to_owned(), ["addr1"], ["addr1"]), + )); + assert_eq!( + curp_node + .change_membership_inner([change1, change2]) + .await + .unwrap_err(), + CurpError::InvalidMemberChange(()) + ); + } +} diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index f2fede65e..c564ec00c 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -374,13 +374,37 @@ impl, RC: RoleChange> CurpNode { req: &AppendEntriesRequest, ) -> Result { let entries = req.entries()?; + let leader_id = req.leader_id; + let term = req.term; + let prev_log_index = req.prev_log_index; + let prev_log_term = req.prev_log_term; let leader_commit = req.leader_commit; + self.append_entries_inner( + entries, + leader_id, + term, + prev_log_index, + prev_log_term, + leader_commit, + ) + } + + /// Handle `AppendEntries` requests + pub(super) fn append_entries_inner( + &self, + entries: Vec>, + leader_id: u64, + req_term: u64, + prev_log_index: u64, + prev_log_term: u64, + leader_commit: u64, + ) -> Result { let membership_entries: Vec<_> = Self::filter_membership_entries(&entries).collect(); let result = self.curp.handle_append_entries( - req.term, - req.leader_id, - req.prev_log_index, - req.prev_log_term, + req_term, + leader_id, + prev_log_index, + prev_log_term, entries, leader_commit, ); diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index ac59d9657..165188653 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -106,6 +106,18 @@ impl RawCurp { st_w.leader_id = None; } } + + /// Returns the current membership state + #[cfg(test)] + pub(crate) fn node_states(&self) -> BTreeMap { + self.ctx.node_states.all_states() + } + + /// Return the current persisted membership + #[cfg(test)] + pub(crate) fn persisted_membership(&self) -> Option<(u64, crate::member::MembershipState)> { + self.ctx.curp_storage.recover_membership().unwrap() + } } #[cfg(test)] diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index f5226593d..43c68e31f 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -1409,6 +1409,18 @@ impl RawCurp { pub(super) fn client_tls_config(&self) -> Option<&ClientTlsConfig> { self.ctx.client_tls_config.as_ref() } + + /// Get a range of log entry + #[cfg(test)] + pub(crate) fn get_log_from(&self, idx: u64) -> Vec>> { + self.log.read().get_from(idx) + } + + /// Trigger the propose id + #[cfg(test)] + pub(crate) fn trigger_all(&self) { + self.ctx.id_barrier.trigger_all(); + } } // Utils diff --git a/crates/curp/src/server/raw_curp/node_state.rs b/crates/curp/src/server/raw_curp/node_state.rs index ce172bdcd..329a7c2dd 100644 --- a/crates/curp/src/server/raw_curp/node_state.rs +++ b/crates/curp/src/server/raw_curp/node_state.rs @@ -162,6 +162,12 @@ impl NodeStates { .zip(states_r.values().map(NodeState::connect).cloned()) .collect() } + + /// Get all node states + #[cfg(test)] + pub(super) fn all_states(&self) -> BTreeMap { + self.states.read().clone() + } } /// The state of a node diff --git a/crates/utils/src/barrier/id.rs b/crates/utils/src/barrier/id.rs index 571e82ded..c01201362 100644 --- a/crates/utils/src/barrier/id.rs +++ b/crates/utils/src/barrier/id.rs @@ -55,4 +55,14 @@ where let _ignore = event.notify(usize::MAX); } } + + /// Trigger all barriers, used in tests + #[inline] + #[doc(hidden)] + pub fn trigger_all(&self) { + let barriers = self.barriers.lock(); + for (_id, event) in barriers.iter() { + let _ignore = event.notify(usize::MAX); + } + } } diff --git a/crates/utils/src/task_manager/mod.rs b/crates/utils/src/task_manager/mod.rs index 587613cb7..255c0f2c9 100644 --- a/crates/utils/src/task_manager/mod.rs +++ b/crates/utils/src/task_manager/mod.rs @@ -268,6 +268,14 @@ impl TaskManager { } true } + + /// Get the number of running handles of the give task + #[doc(hidden)] + #[inline] + #[must_use] + pub fn num_handles(&self, name: TaskName) -> Option { + self.tasks.get(&name).map(|t| t.handle.len()) + } } impl Default for TaskManager { From e5e87b56ae09145aab949e1f7103606cccd29c83 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 14 Oct 2024 09:05:33 +0800 Subject: [PATCH 252/322] test: add membership change curp integration tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/tests/it/server.rs | 126 ++++++++++++++++++++++++++++++++- 1 file changed, 125 insertions(+), 1 deletion(-) diff --git a/crates/curp/tests/it/server.rs b/crates/curp/tests/it/server.rs index 92a20ecdb..1f846308b 100644 --- a/crates/curp/tests/it/server.rs +++ b/crates/curp/tests/it/server.rs @@ -9,7 +9,7 @@ use clippy_utilities::NumericCast; use curp::{ client::{ClientApi, ClientBuilder}, member::MembershipInfo, - rpc::{Change, CurpError, Node, NodeMetadata}, + rpc::{Change, CurpError, MembershipResponse, Node, NodeMetadata}, }; use curp_test_utils::{ init_logger, sleep_millis, @@ -20,6 +20,7 @@ use madsim::rand::{thread_rng, Rng}; use test_macros::abort_on_panic; use tokio::net::TcpListener; use tokio_stream::StreamExt; +use tracing_test::traced_test; use utils::config::ClientConfig; use crate::common::curp_group::{ @@ -713,3 +714,126 @@ async fn move_leader_should_move_leadership_to_target_node() { assert_eq!(target, new_leader); assert_ne!(old_leader, new_leader); } + +// A full single step change cycle +#[traced_test] +#[tokio::test(flavor = "multi_thread")] +async fn membership_change_ok_case0() { + let group = CurpGroup::new(3).await; + let client = group.new_client().await; + assert_eq!(client.fetch_cluster(true).await.unwrap().leader_id, 0); + + let change = Change::Add(Node::new(3, NodeMetadata::default())); + client.change_membership(vec![change]).await.unwrap(); + let resp = client.fetch_cluster(true).await.unwrap(); + assert_membership_response(resp, [0, 1, 2], [0, 1, 2, 3]); + + let change = Change::Promote(3); + client.change_membership(vec![change]).await.unwrap(); + let resp = client.fetch_cluster(true).await.unwrap(); + assert_membership_response(resp, [0, 1, 2, 3], [0, 1, 2, 3]); + + let change = Change::Demote(2); + client.change_membership(vec![change]).await.unwrap(); + let resp = client.fetch_cluster(true).await.unwrap(); + assert_membership_response(resp, [0, 1, 3], [0, 1, 2, 3]); + + let change = Change::Remove(2); + client.change_membership(vec![change]).await.unwrap(); + let resp = client.fetch_cluster(true).await.unwrap(); + assert_membership_response(resp, [0, 1, 3], [0, 1, 3]); +} + +// Mixed membership change +#[tokio::test(flavor = "multi_thread")] +async fn membership_change_ok_case1() { + init_logger(); + let mut group = CurpGroup::new(3).await; + let client = group.new_client().await; + assert_eq!(client.fetch_cluster(true).await.unwrap().leader_id, 0); + + let listen3 = TcpListener::bind("0.0.0.0:0").await.unwrap(); + let addr3 = listen3.local_addr().unwrap().to_string(); + let listen4 = TcpListener::bind("0.0.0.0:0").await.unwrap(); + let addr4 = listen4.local_addr().unwrap().to_string(); + + let change0 = Change::Add(Node::new(3, NodeMetadata::new("node3", [&addr3], [&addr3]))); + let change1 = Change::Add(Node::new(4, NodeMetadata::new("node4", [&addr4], [&addr4]))); + client + .change_membership(vec![change0, change1]) + .await + .unwrap(); + group + .run_node( + listen3, + "node3".to_owned(), + MembershipInfo::new(3, BTreeMap::default()), + ) + .await; + group + .run_node( + listen4, + "node4".to_owned(), + MembershipInfo::new(3, BTreeMap::default()), + ) + .await; + let resp = client.fetch_cluster(true).await.unwrap(); + assert_membership_response(resp, [0, 1, 2], [0, 1, 2, 3, 4]); + + let change0 = Change::Promote(3); + let change1 = Change::Demote(2); + client + .change_membership(vec![change0, change1]) + .await + .unwrap(); + let resp = client.fetch_cluster(true).await.unwrap(); + assert_membership_response(resp, [0, 1, 3], [0, 1, 2, 3, 4]); + + let change0 = Change::Promote(4); + let change1 = Change::Remove(2); + client + .change_membership(vec![change0, change1]) + .await + .unwrap(); + let resp = client.fetch_cluster(true).await.unwrap(); + assert_membership_response(resp, [0, 1, 3, 4], [0, 1, 3, 4]); +} + +// Remove the leader +#[tokio::test(flavor = "multi_thread")] +async fn membership_change_ok_case2() { + init_logger(); + let group = CurpGroup::new(5).await; + let client = group.new_client().await; + assert_eq!(client.fetch_cluster(true).await.unwrap().leader_id, 0); + + let change = Change::Demote(0); + client.change_membership(vec![change]).await.unwrap(); + let resp = client.fetch_cluster(true).await.unwrap(); + assert_membership_response(resp, [1, 2, 3, 4], [0, 1, 2, 3, 4]); + + let change = Change::Remove(0); + // workaround for clinet id expiry + while client + .change_membership(vec![change.clone()]) + .await + .is_err() + {} + let resp = client.fetch_cluster(true).await.unwrap(); + assert_membership_response(resp, [1, 2, 3, 4], [1, 2, 3, 4]); +} + +fn assert_membership_response( + resp: MembershipResponse, + expect_member_ids: impl IntoIterator, + expect_node_ids: impl IntoIterator, +) { + println!("leader: {}", resp.leader_id); + let member_ids: BTreeSet<_> = resp.members.into_iter().flat_map(|s| s.set).collect(); + let expect_member_ids: BTreeSet<_> = expect_member_ids.into_iter().collect(); + assert_eq!(member_ids, expect_member_ids); + + let node_ids: BTreeSet<_> = resp.nodes.into_iter().map(|n| n.node_id).collect(); + let expect_node_ids: BTreeSet<_> = expect_node_ids.into_iter().collect(); + assert_eq!(node_ids, expect_node_ids); +} From 0ecdda7ea01f17b61fd6b509349f67ee2afc8baa Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 14 Oct 2024 10:02:33 +0800 Subject: [PATCH 253/322] test: add membership change xline integration tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/xline/tests/it/cluster_test.rs | 84 +++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/crates/xline/tests/it/cluster_test.rs b/crates/xline/tests/it/cluster_test.rs index 9c435859e..38a3db9e8 100644 --- a/crates/xline/tests/it/cluster_test.rs +++ b/crates/xline/tests/it/cluster_test.rs @@ -1,5 +1,6 @@ use std::{error::Error, time::Duration}; +use etcd_client::Client as EtcdClient; use test_macros::abort_on_panic; use tokio::{net::TcpListener, time::sleep}; use xline_client::{Client, ClientOptions}; @@ -87,3 +88,86 @@ async fn xline_update_node() -> Result<(), Box> { Ok(()) } + +#[tokio::test(flavor = "multi_thread")] +#[abort_on_panic] +async fn xline_remove_node_etcd_client() -> Result<(), Box> { + let mut cluster = Cluster::new(5).await; + cluster.start().await; + let mut cluster_client = EtcdClient::connect(cluster.all_client_addrs(), None) + .await? + .cluster_client(); + let list_res = cluster_client.member_list().await?; + assert_eq!(list_res.members().len(), 5); + let remove_id = list_res.members()[0].id(); + let remove_res = cluster_client.member_remove(remove_id).await?; + assert_eq!(remove_res.members().len(), 4); + assert!(remove_res.members().iter().all(|m| m.id() != remove_id)); + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +#[abort_on_panic] +async fn xline_add_node_etcd_client() -> Result<(), Box> { + let mut cluster = Cluster::new(3).await; + cluster.start().await; + let client = EtcdClient::connect(cluster.all_client_addrs(), None).await?; + let mut cluster_client = client.cluster_client(); + let mut kv_client = client.kv_client(); + _ = kv_client.put("key", "value", None).await?; + let new_node_peer_listener = TcpListener::bind("0.0.0.0:0").await?; + let new_node_peer_urls = vec![format!("http://{}", new_node_peer_listener.local_addr()?)]; + let new_node_client_listener = TcpListener::bind("0.0.0.0:0").await?; + let new_node_client_urls = vec![format!("http://{}", new_node_client_listener.local_addr()?)]; + let add_res = cluster_client.member_add(new_node_peer_urls, None).await?; + assert_eq!(add_res.member_list().len(), 4); + cluster + .run_node(new_node_client_listener, new_node_peer_listener) + .await; + let mut etcd_client = etcd_client::Client::connect(&new_node_client_urls, None).await?; + let res = etcd_client.get("key", None).await?; + assert_eq!(res.kvs().get(0).unwrap().value(), b"value"); + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +#[abort_on_panic] +async fn xline_update_node_etcd_client() -> Result<(), Box> { + let mut cluster = Cluster::new(3).await; + cluster.start().await; + let mut cluster_client = cluster.client().await.cluster_client(); + let old_list_res = cluster_client.member_list(false).await?; + assert_eq!(old_list_res.members.len(), 3); + let update_id = old_list_res.members[0].id; + let port = old_list_res.members[0] + .peer_ur_ls + .first() + .unwrap() + .split(':') + .last() + .unwrap() + .parse::() + .unwrap(); + let update_res = cluster_client + .member_update(update_id, [format!("http://localhost:{}", port)]) + .await?; + assert_eq!(update_res.members.len(), 3); + sleep(Duration::from_secs(3)).await; + let new_list_res = cluster_client.member_list(false).await?; + assert_eq!(new_list_res.members.len(), 3); + let old_addr = &old_list_res + .members + .iter() + .find(|m| m.id == update_id) + .unwrap() + .peer_ur_ls; + let new_addr = &new_list_res + .members + .iter() + .find(|m| m.id == update_id) + .unwrap() + .peer_ur_ls; + assert_ne!(old_addr, new_addr); + + Ok(()) +} From 10f7a860b34d478608d9e04020f795c7fefc043b Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sat, 12 Oct 2024 21:57:15 +0800 Subject: [PATCH 254/322] feat: implement wait learner on curp node Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- Cargo.lock | 17 +++++---- crates/curp/Cargo.toml | 2 +- crates/curp/proto/common | 2 +- crates/curp/src/rpc/mod.rs | 2 + .../curp/src/server/curp_node/member_impl.rs | 37 +++++++++++++++++++ crates/curp/src/server/mod.rs | 17 +++++++++ crates/curp/src/server/raw_curp/mod.rs | 36 +++++++++++++++--- crates/curp/src/server/raw_curp/monitor.rs | 30 +++++++++++++++ crates/xline/src/server/auth_wrapper.rs | 10 +++++ 9 files changed, 137 insertions(+), 16 deletions(-) create mode 100644 crates/curp/src/server/raw_curp/monitor.rs diff --git a/Cargo.lock b/Cargo.lock index 3c13d19bb..8a276146a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -950,7 +950,7 @@ dependencies = [ "http", "prost", "tokio", - "tokio-stream 0.1.15", + "tokio-stream 0.1.16", "tonic", "tonic-build", "tower", @@ -1502,7 +1502,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] @@ -1978,7 +1978,7 @@ dependencies = [ "serde_json", "thiserror", "tokio", - "tokio-stream 0.1.15", + "tokio-stream 0.1.16", ] [[package]] @@ -2964,13 +2964,14 @@ dependencies = [ "futures-core", "madsim-tokio", "pin-project-lite", + "tokio-util", ] [[package]] name = "tokio-stream" -version = "0.1.15" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" +checksum = "4f4e6ce100d0eb49a2734f8c0812bcd324cf357d21810932c5df6b96ef2b86f1" dependencies = [ "futures-core", "pin-project-lite", @@ -3049,7 +3050,7 @@ dependencies = [ "socket2", "tokio", "tokio-rustls", - "tokio-stream 0.1.15", + "tokio-stream 0.1.16", "tower", "tower-layer", "tower-service", @@ -3078,7 +3079,7 @@ dependencies = [ "async-stream", "prost", "tokio", - "tokio-stream 0.1.15", + "tokio-stream 0.1.16", "tonic", ] @@ -3691,7 +3692,7 @@ dependencies = [ "syn 2.0.65", "time", "tokio", - "tokio-stream 0.1.15", + "tokio-stream 0.1.16", "tokio-util", "tonic", "tower", diff --git a/crates/curp/Cargo.toml b/crates/curp/Cargo.toml index 0294aa389..ced3646fc 100644 --- a/crates/curp/Cargo.toml +++ b/crates/curp/Cargo.toml @@ -41,7 +41,7 @@ tokio = { version = "0.2.25", package = "madsim-tokio", features = [ "rt-multi-thread", ] } tokio-stream = { git = "https://github.com/madsim-rs/tokio.git", rev = "ab251ad", features = [ - "net", + "net", "sync" ] } tokio-util = "0.7.11" tonic = { version = "0.5.0", package = "madsim-tonic", features = ["tls"] } diff --git a/crates/curp/proto/common b/crates/curp/proto/common index 047ee94f2..87fbd7fd2 160000 --- a/crates/curp/proto/common +++ b/crates/curp/proto/common @@ -1 +1 @@ -Subproject commit 047ee94f28cb92e7df05dd5c907d9f06620a09ba +Subproject commit 87fbd7fd29335e0799017dde55b93e71dc69da8a diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index 93a45df91..39209481c 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -52,6 +52,8 @@ pub use self::proto::{ ShutdownRequest, ShutdownResponse, SyncedResponse, + WaitLearnerRequest, + WaitLearnerResponse, WaitSyncedRequest, WaitSyncedResponse, }, diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 58567e464..36433acb9 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -12,6 +12,9 @@ use curp_external_api::cmd::Command; use curp_external_api::cmd::CommandExecutor; use curp_external_api::role_change::RoleChange; use curp_external_api::LogIndex; +use tokio_stream::wrappers::BroadcastStream; +use tokio_stream::StreamExt; +use tracing::debug; use utils::task_manager::tasks::TaskName; use super::CurpNode; @@ -28,6 +31,8 @@ use crate::rpc::MembershipChange; use crate::rpc::MembershipResponse; use crate::rpc::ProposeId; use crate::rpc::Redirect; +use crate::rpc::WaitLearnerRequest; +use crate::rpc::WaitLearnerResponse; use crate::server::raw_curp::node_state::NodeState; // Leader methods @@ -45,6 +50,38 @@ impl, RC: RoleChange> CurpNode { self.change_membership_inner(changes).await } + /// Handle `ProposeStream` requests + pub(crate) fn wait_learner( + &self, + req: WaitLearnerRequest, + tx: flume::Sender>, + ) { + let rxs = self.curp.register_monitoring(req.node_ids); + let _handle = tokio::spawn(async move { + let mut fused = futures::stream::select_all(rxs.into_iter().map(|(id, rx)| { + let stream = BroadcastStream::new(rx); + stream.map(move |res| res.map(|x| (id, x))) + })) + // ignores entry that has been removed + .filter_map(Result::ok); + + while let Some(res) = fused.next().await { + let (node_id, (current_idx, latest_idx)) = res; + if tx + .send(Ok(WaitLearnerResponse { + node_id, + current_idx, + latest_idx, + })) + .is_err() + { + debug!("wait learner stream unexpectedly closed"); + break; + } + } + }); + } + /// Performs a membership change to the cluster pub(crate) async fn change_membership_inner( &self, diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index 75d19ebe8..a46d34114 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -49,6 +49,8 @@ use crate::rpc::TryBecomeLeaderNowRequest; use crate::rpc::TryBecomeLeaderNowResponse; use crate::rpc::VoteRequest; use crate::rpc::VoteResponse; +use crate::rpc::WaitLearnerRequest; +use crate::rpc::WaitLearnerResponse; /// Command worker to do execution and after sync mod cmd_worker; @@ -202,6 +204,21 @@ impl, RC: RoleChange> crate::rpc::Protocol fo .map(tonic::Response::new) .map_err(Into::into) } + + type WaitLearnerStream = RecvStream<'static, Result>; + + #[instrument(skip_all, name = "wait_learner")] + async fn wait_learner( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + /// Max stream channel size + const CHANNEL_SIZE: usize = 1024; + let (tx, rx) = flume::bounded(CHANNEL_SIZE); + self.inner.wait_learner(request.into_inner(), tx); + + Ok(tonic::Response::new(rx.into_stream())) + } } #[tonic::async_trait] diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 43c68e31f..5abf5526e 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -31,6 +31,7 @@ use opentelemetry::KeyValue; use parking_lot::Mutex; use parking_lot::RwLock; use parking_lot::RwLockUpgradableReadGuard; +use tokio::sync::broadcast; use tokio::sync::oneshot; #[cfg(not(madsim))] use tonic::transport::ClientTlsConfig; @@ -100,6 +101,9 @@ mod member_impl; /// Unified state for each node pub(crate) mod node_state; +/// Node monitor implementation +mod monitor; + /// The curp state machine pub struct RawCurp { /// Curp state @@ -310,6 +314,9 @@ enum Role { Learner, } +/// (current index, latest index) +type MonitorResult = (LogIndex, LogIndex); + /// Relevant context for Curp /// /// WARN: To avoid deadlock, the lock order should be: @@ -349,6 +356,8 @@ struct Context { id_barrier: Arc>, /// States of nodes in the cluster node_states: Arc, + /// Node collection to monitor state + monitoring: Arc>>>, } impl Context { @@ -412,6 +421,7 @@ impl ContextBuilder { Some(value) => value, None => return Err(ContextBuilderError::UninitializedField("node_states")), }, + monitoring: Arc::new(RwLock::default()), }) } } @@ -778,9 +788,7 @@ impl RawCurp { return Ok(true); }; - self.ctx - .node_states - .update_match_index(follower_id, last_sent_index); + self.update_match_index(follower_id, last_sent_index); // check if commit_index needs to be updated let log_r = self.log.upgradable_read(); @@ -1069,9 +1077,7 @@ impl RawCurp { if cur_role != Role::Leader { return Err(()); } - self.ctx - .node_states - .update_match_index(follower_id, meta.last_included_index.numeric_cast()); + self.update_match_index(follower_id, meta.last_included_index.numeric_cast()); Ok(()) } @@ -1767,4 +1773,22 @@ impl RawCurp { let _ignore_asr = cb.asr_buffer.swap_remove(&id); let _ignore_conf = cb.conf_buffer.swap_remove(&id); } + + /// Update match index, also updates the monitoring ids + fn update_match_index(&self, id: u64, index: LogIndex) { + self.ctx.node_states.update_match_index(id, index); + let latest = self.log.read().last_log_index(); + // removes the entry if the node is up-to-date. + let to_remove = self.ctx.monitoring.map_read(|m| { + m.get(&id).map_or(false, |tx| { + if tx.send((index, latest)).is_err() { + error!("broadcast rx closed"); + } + index == latest + }) + }); + if to_remove { + let _ignore = self.ctx.monitoring.write().remove(&id); + } + } } diff --git a/crates/curp/src/server/raw_curp/monitor.rs b/crates/curp/src/server/raw_curp/monitor.rs new file mode 100644 index 000000000..94e4584e5 --- /dev/null +++ b/crates/curp/src/server/raw_curp/monitor.rs @@ -0,0 +1,30 @@ +use std::collections::BTreeMap; + +use curp_external_api::{cmd::Command, role_change::RoleChange, LogIndex}; +use tokio::sync::broadcast; + +use super::RawCurp; + +impl RawCurp { + /// Adds new nodes to monitor + pub(crate) fn register_monitoring>( + &self, + node_ids: Ids, + ) -> BTreeMap> { + /// Max number of receivers + const MAX_RECEIVERS: usize = 1024; + let mut monitoring_w = self.ctx.monitoring.write(); + node_ids + .into_iter() + .map(|id| { + ( + id, + monitoring_w + .entry(id) + .or_insert_with(|| broadcast::channel(MAX_RECEIVERS).0) + .subscribe(), + ) + }) + .collect() + } +} diff --git a/crates/xline/src/server/auth_wrapper.rs b/crates/xline/src/server/auth_wrapper.rs index ea0c07833..e826c0afa 100644 --- a/crates/xline/src/server/auth_wrapper.rs +++ b/crates/xline/src/server/auth_wrapper.rs @@ -7,6 +7,7 @@ use curp::{ FetchReadStateResponse, LeaseKeepAliveMsg, MembershipResponse, MoveLeaderRequest, MoveLeaderResponse, OpResponse, ProposeRequest, Protocol, ReadIndexRequest, ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, ShutdownResponse, + WaitLearnerRequest, WaitLearnerResponse, }, }; use flume::r#async::RecvStream; @@ -113,4 +114,13 @@ impl Protocol for AuthWrapper { ) -> Result, tonic::Status> { self.curp_server.change_membership(request).await } + + type WaitLearnerStream = RecvStream<'static, Result>; + + async fn wait_learner( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + self.curp_server.wait_learner(request).await + } } From 7f2c6b197fc33f98756d712377a19990cfb2879a Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 14 Oct 2024 16:10:19 +0800 Subject: [PATCH 255/322] feat: implement wait learner on curp client Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/connect.rs | 21 ++++- crates/curp/src/client/keep_alive.rs | 12 +++ crates/curp/src/client/retry.rs | 16 +++- crates/curp/src/client/unary/mod.rs | 27 +++++- crates/curp/src/rpc/connect/lazy.rs | 16 +++- crates/curp/src/rpc/connect/mod.rs | 41 +++++++- crates/curp/src/rpc/reconnect.rs | 14 +++ crates/xline-client/src/clients/cluster.rs | 104 ++++++++++++++++++++- crates/xline-client/src/clients/mod.rs | 2 +- crates/xline-client/src/lib.rs | 2 +- 10 files changed, 242 insertions(+), 13 deletions(-) diff --git a/crates/curp/src/client/connect.rs b/crates/curp/src/client/connect.rs index a59301228..fc446d5bc 100644 --- a/crates/curp/src/client/connect.rs +++ b/crates/curp/src/client/connect.rs @@ -1,9 +1,12 @@ +use std::collections::BTreeSet; + use async_trait::async_trait; use curp_external_api::cmd::Command; +use futures::Stream; use crate::{ members::ServerId, - rpc::{Change, MembershipResponse, ReadState}, + rpc::{Change, MembershipResponse, ReadState, WaitLearnerResponse}, }; use super::retry::Context; @@ -58,6 +61,12 @@ pub trait ClientApi { /// Performs membership change async fn change_membership(&self, changes: Vec) -> Result<(), Self::Error>; + + /// Send wait learner of the give ids, returns a stream of updating response stream + async fn wait_learner( + &self, + node_ids: BTreeSet, + ) -> Result> + Send>, Self::Error>; } /// This trait override some unrepeatable methods in ClientApi, and a client with this trait will be able to retry. @@ -98,4 +107,14 @@ pub(crate) trait RepeatableClientApi { changes: Vec, ctx: Context, ) -> Result; + + /// Send wait learner of the give ids, returns a stream of updating response stream + async fn wait_learner( + &self, + node_ids: BTreeSet, + ctx: Context, + ) -> Result< + Box> + Send>, + Self::Error, + >; } diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index 8b40c501d..835c2c982 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -159,6 +159,7 @@ mod tests { FetchReadStateRequest, FetchReadStateResponse, MembershipResponse, MoveLeaderRequest, MoveLeaderResponse, Node, NodeMetadata, OpResponse, ProposeRequest, QuorumSet, ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, ShutdownResponse, + WaitLearnerRequest, WaitLearnerResponse, }, }; @@ -278,6 +279,17 @@ mod tests { ) -> Result, CurpError> { unreachable!("please use MockedConnectApi") } + + async fn wait_learner( + &self, + request: WaitLearnerRequest, + timeout: Duration, + ) -> Result< + tonic::Response> + Send>>, + CurpError, + > { + unreachable!("please use MockedConnectApi") + } } /// Create mocked stream connects diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index a4d08baec..4ad11e052 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -1,11 +1,11 @@ use std::{ ops::SubAssign, sync::{atomic::AtomicU64, Arc}, - time::Duration, + time::Duration, collections::BTreeSet, }; use async_trait::async_trait; -use futures::Future; +use futures::{Future, Stream}; use parking_lot::RwLock; use tracing::{debug, warn}; @@ -19,7 +19,7 @@ use super::{ }; use crate::{ members::ServerId, - rpc::{CurpError, ReadState, Redirect, ProposeId, MembershipResponse, NodeMetadata, Node, Change}, tracker::Tracker, + rpc::{CurpError, ReadState, Redirect, ProposeId, MembershipResponse, NodeMetadata, Node, Change, WaitLearnerResponse}, tracker::Tracker, }; /// Backoff config @@ -465,6 +465,16 @@ where Ok(()) } + + /// Send wait learner of the give ids, returns a stream of updating response stream + async fn wait_learner( + &self, + node_ids: BTreeSet, + ) -> Result> + Send>, Self::Error> { + self.retry::<_, _>(|client, ctx| client.wait_learner(node_ids.clone(), ctx)) + .await + } + } /// Tests for backoff diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 728b2dff8..282ba37e7 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -1,10 +1,11 @@ /// Client propose implementation mod propose_impl; -use std::marker::PhantomData; +use std::{collections::BTreeSet, marker::PhantomData}; use async_trait::async_trait; use curp_external_api::cmd::Command; +use futures::Stream; use tracing::warn; use super::{ @@ -14,7 +15,8 @@ use super::{ }; use crate::rpc::{ Change, ChangeMembershipRequest, CurpError, FetchReadStateRequest, MembershipChange, - MembershipResponse, MoveLeaderRequest, ReadState, ShutdownRequest, + MembershipResponse, MoveLeaderRequest, ReadState, ShutdownRequest, WaitLearnerRequest, + WaitLearnerResponse, }; /// The unary client @@ -128,4 +130,25 @@ impl RepeatableClientApi for Unary { Ok(resp) } + + /// Send wait learner of the give ids, returns a stream of updating response stream + async fn wait_learner( + &self, + node_ids: BTreeSet, + ctx: Context, + ) -> Result< + Box> + Send>, + Self::Error, + > { + let node_ids = node_ids.into_iter().collect(); + let req = WaitLearnerRequest { node_ids }; + let timeout = self.config.wait_synced_timeout(); + let resp = ctx + .cluster_state() + .map_leader(|conn| async move { conn.wait_learner(req, timeout).await }) + .await? + .into_inner(); + + Ok(resp) + } } diff --git a/crates/curp/src/rpc/connect/lazy.rs b/crates/curp/src/rpc/connect/lazy.rs index 6e69a148c..5823ca9d3 100644 --- a/crates/curp/src/rpc/connect/lazy.rs +++ b/crates/curp/src/rpc/connect/lazy.rs @@ -18,7 +18,8 @@ use crate::{ FetchMembershipRequest, FetchReadStateRequest, FetchReadStateResponse, InstallSnapshotResponse, MembershipResponse, MoveLeaderRequest, MoveLeaderResponse, OpResponse, ProposeRequest, ReadIndexResponse, RecordRequest, RecordResponse, - ShutdownRequest, ShutdownResponse, VoteRequest, VoteResponse, + ShutdownRequest, ShutdownResponse, VoteRequest, VoteResponse, WaitLearnerRequest, + WaitLearnerResponse, }, snapshot::Snapshot, }; @@ -254,4 +255,17 @@ impl ConnectApi for ConnectLazy> { .change_membership(request, timeout) .await } + + async fn wait_learner( + &self, + request: WaitLearnerRequest, + timeout: Duration, + ) -> Result< + tonic::Response> + Send>>, + CurpError, + > { + let mut inner = self.inner.lock().await; + self.connect_inner(&mut inner); + inner.as_ref().unwrap().wait_learner(request, timeout).await + } } diff --git a/crates/curp/src/rpc/connect/mod.rs b/crates/curp/src/rpc/connect/mod.rs index 5a5d54312..a0463facc 100644 --- a/crates/curp/src/rpc/connect/mod.rs +++ b/crates/curp/src/rpc/connect/mod.rs @@ -54,7 +54,7 @@ use super::{ proto::commandpb::{ReadIndexRequest, ReadIndexResponse}, reconnect::Reconnect, ChangeMembershipRequest, FetchMembershipRequest, MembershipResponse, OpResponse, RecordRequest, - RecordResponse, + RecordResponse, WaitLearnerRequest, WaitLearnerResponse, }; /// Install snapshot chunk size: 64KB @@ -232,6 +232,16 @@ pub(crate) trait ConnectApi: Send + Sync + 'static { request: ChangeMembershipRequest, timeout: Duration, ) -> Result, CurpError>; + + /// Send `WaitLearnerRequest` + async fn wait_learner( + &self, + request: WaitLearnerRequest, + timeout: Duration, + ) -> Result< + tonic::Response> + Send>>, + CurpError, + >; } /// Inner Connect interface among different servers @@ -512,6 +522,20 @@ impl ConnectApi for Connect> { let req = tonic::Request::new(request); with_timeout!(timeout, client.change_membership(req)).map_err(Into::into) } + + async fn wait_learner( + &self, + request: WaitLearnerRequest, + timeout: Duration, + ) -> Result< + tonic::Response> + Send>>, + CurpError, + > { + let mut client = self.rpc_connect.clone(); + let req = tonic::Request::new(request); + let resp = with_timeout!(timeout, client.wait_learner(req))?.into_inner(); + Ok(tonic::Response::new(Box::new(resp))) + } } #[allow(clippy::let_and_return)] // for metrics @@ -789,6 +813,21 @@ where req.metadata_mut().inject_current(); self.server.change_membership(req).await.map_err(Into::into) } + + async fn wait_learner( + &self, + request: WaitLearnerRequest, + _timeout: Duration, + ) -> Result< + tonic::Response> + Send>>, + CurpError, + > { + let mut req = tonic::Request::new(request); + req.metadata_mut().inject_bypassed(); + req.metadata_mut().inject_current(); + let resp = self.server.wait_learner(req).await?.into_inner(); + Ok(tonic::Response::new(Box::new(resp))) + } } /// Generate heartbeat stream diff --git a/crates/curp/src/rpc/reconnect.rs b/crates/curp/src/rpc/reconnect.rs index 01039b8f3..57dfb1634 100644 --- a/crates/curp/src/rpc/reconnect.rs +++ b/crates/curp/src/rpc/reconnect.rs @@ -14,6 +14,8 @@ use crate::{ }, }; +use super::{WaitLearnerRequest, WaitLearnerResponse}; + /// Auto reconnect of a connection pub(super) struct Reconnect { /// Connect id @@ -171,4 +173,16 @@ impl ConnectApi for Reconnect { ) -> Result, CurpError> { execute_with_reconnect!(self, ConnectApi::change_membership, request, timeout) } + + /// Send `WaitLearnerRequest` + async fn wait_learner( + &self, + request: WaitLearnerRequest, + timeout: Duration, + ) -> Result< + tonic::Response> + Send>>, + CurpError, + > { + execute_with_reconnect!(self, ConnectApi::wait_learner, request, timeout) + } } diff --git a/crates/xline-client/src/clients/cluster.rs b/crates/xline-client/src/clients/cluster.rs index 545d28510..b788b2ef9 100644 --- a/crates/xline-client/src/clients/cluster.rs +++ b/crates/xline-client/src/clients/cluster.rs @@ -1,35 +1,49 @@ use std::sync::Arc; +use curp::rpc::WaitLearnerResponse; +use futures::{Stream, StreamExt}; use tonic::transport::Channel; -use crate::{error::Result, AuthService}; +use crate::{error::Result, AuthService, CurpClient}; use xlineapi::{ MemberAddResponse, MemberListResponse, MemberPromoteResponse, MemberRemoveResponse, MemberUpdateResponse, }; /// Client for Cluster operations. -#[derive(Clone, Debug)] +#[derive(Clone)] #[non_exhaustive] pub struct ClusterClient { /// Inner client #[cfg(not(madsim))] inner: xlineapi::ClusterClient>, + /// The client running the CURP protocol, communicate with all servers. + curp_client: Arc, /// Inner client #[cfg(madsim)] inner: xlineapi::ClusterClient, } +impl std::fmt::Debug for ClusterClient { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ClusterClient") + .field("inner", &self.inner) + .finish() + } +} + impl ClusterClient { /// Create a new cluster client #[inline] #[must_use] - pub fn new(channel: Channel, token: Option) -> Self { + pub fn new(curp_client: Arc, channel: Channel, token: Option) -> Self { Self { inner: xlineapi::ClusterClient::new(AuthService::new( channel, token.and_then(|t| t.parse().ok().map(Arc::new)), )), + curp_client, } } @@ -224,4 +238,88 @@ impl ClusterClient { .await? .into_inner()) } + + /// Wait for learners to be added to the cluster. + /// + /// # Arguments + /// + /// * `node_ids` - An iterator of node IDs to wait for. + /// + /// # Errors + /// + /// Returns an error if the request could not be sent or if the response is invalid. + /// + /// # Examples + /// + /// ```no_run + /// use xline_client::{Client, ClientOptions, clients::LearnerStatus}; + /// use anyhow::Result; + /// use futures::StreamExt; + /// + /// #[tokio::main] + /// async fn main() -> Result<()> { + /// let curp_members = ["10.0.0.1:2379", "10.0.0.2:2379", "10.0.0.3:2379"]; + /// + /// let mut client = Client::connect(curp_members, ClientOptions::default()) + /// .await? + /// .cluster_client(); + /// let mut stream = client.wait_learner(vec![1, 2, 3]).await?; + /// + /// while let Some(Ok(status)) = stream.next().await { + /// match status { + /// LearnerStatus::Pending { node_id, index } => { + /// println!("Learner node {} is pending with index {}", node_id, index); + /// } + /// LearnerStatus::Ready => { + /// println!("Learner node is ready"); + /// } + /// } + /// } + /// + /// // all learners are up-to-date + /// + /// Ok(()) + /// } + /// ``` + #[inline] + pub async fn wait_learner>( + &mut self, + node_ids: Ids, + ) -> Result> + Send + Unpin>> { + let stream = self + .curp_client + .wait_learner(node_ids.into_iter().collect()) + .await?; + let stream_mapped = Box::into_pin(stream).map(|r| r.map(Into::into).map_err(Into::into)); + + Ok(Box::new(stream_mapped)) + } +} + +#[allow(clippy::exhaustive_enums)] // only two states +#[derive(Debug, Clone, Copy)] +/// Represents the state of a learner +pub enum LearnerStatus { + /// The learner node is pending and not yet ready. + Pending { + /// The id of the node + node_id: u64, + /// The current replicated log index of the node + index: u64, + }, + /// The learner node is up-to-date. + Ready, +} + +impl From for LearnerStatus { + #[inline] + fn from(resp: WaitLearnerResponse) -> Self { + if resp.current_idx == resp.latest_idx { + return LearnerStatus::Ready; + } + LearnerStatus::Pending { + node_id: resp.node_id, + index: resp.current_idx, + } + } } diff --git a/crates/xline-client/src/clients/mod.rs b/crates/xline-client/src/clients/mod.rs index 9c7c24828..09ecd0770 100644 --- a/crates/xline-client/src/clients/mod.rs +++ b/crates/xline-client/src/clients/mod.rs @@ -1,5 +1,5 @@ pub use auth::AuthClient; -pub use cluster::ClusterClient; +pub use cluster::{ClusterClient, LearnerStatus}; pub use election::ElectionClient; pub use kv::KvClient; pub use lease::LeaseClient; diff --git a/crates/xline-client/src/lib.rs b/crates/xline-client/src/lib.rs index c34051134..c72a3804a 100644 --- a/crates/xline-client/src/lib.rs +++ b/crates/xline-client/src/lib.rs @@ -277,7 +277,7 @@ impl Client { ); let auth = AuthClient::new(Arc::clone(&curp_client), channel.clone(), token.clone()); let maintenance = MaintenanceClient::new(channel.clone(), token.clone()); - let cluster = ClusterClient::new(channel.clone(), token.clone()); + let cluster = ClusterClient::new(Arc::clone(&curp_client), channel.clone(), token.clone()); let watch = WatchClient::new(channel, token); let election = ElectionClient::new(); let member = MemberClient::new(curp_client); From 130f14a4a1a172281ea34621859bcb997e9e4d46 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 16 Oct 2024 20:05:16 +0800 Subject: [PATCH 256/322] refactor: reimplement curp follower replication Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- .../curp/src/server/curp_node/member_impl.rs | 3 +- crates/curp/src/server/curp_node/mod.rs | 6 + .../curp/src/server/curp_node/replication.rs | 344 ++++++++++++++++++ crates/curp/src/server/raw_curp/mod.rs | 128 ++++++- crates/curp/src/server/raw_curp/node_state.rs | 10 +- 5 files changed, 485 insertions(+), 6 deletions(-) create mode 100644 crates/curp/src/server/curp_node/replication.rs diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 36433acb9..ebb37ef4a 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -246,7 +246,7 @@ mod test { TestRoleChange, }; use engine::MemorySnapshotAllocator; - use parking_lot::RwLock; + use parking_lot::{Mutex, RwLock}; use tokio::sync::mpsc; use tracing_test::traced_test; use utils::{config::EngineConfig, task_manager::TaskManager}; @@ -282,6 +282,7 @@ mod test { cmd_executor: Arc::new(ce), as_tx, propose_tx, + replication_handles: Mutex::default(), } } diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index c564ec00c..be69733e3 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -63,6 +63,9 @@ use crate::{ /// `CurpNode` member implementation mod member_impl; +/// Log replication implementation +mod replication; + /// After sync entry, composed of a log entry and response sender pub(crate) type AfterSyncEntry = (Arc>, Option>); @@ -133,6 +136,8 @@ pub(super) struct CurpNode, RC: RoleChange> { as_tx: flume::Sender>, /// Tx to send to propose task propose_tx: flume::Sender>, + /// All handles of the replication tasks + replication_handles: Mutex, } /// Handlers for clients @@ -826,6 +831,7 @@ impl, RC: RoleChange> CurpNode { cmd_executor, as_tx, propose_tx, + replication_handles: Mutex::default(), }) } diff --git a/crates/curp/src/server/curp_node/replication.rs b/crates/curp/src/server/curp_node/replication.rs new file mode 100644 index 000000000..ea02dce0b --- /dev/null +++ b/crates/curp/src/server/curp_node/replication.rs @@ -0,0 +1,344 @@ +#![allow(unused)] +use std::{sync::Arc, time::Duration}; + +use curp_external_api::{ + cmd::{Command, CommandExecutor}, + role_change::RoleChange, + LogIndex, +}; +use futures::{future::join_all, Future, FutureExt}; +use tokio::{sync::oneshot, task::JoinHandle, time::MissedTickBehavior}; +use tonic::Response; +use tracing::{debug, error, info, warn}; +use utils::{config::CurpConfig, parking_lot_lock::MutexMap}; + +use crate::{ + rpc::{ + connect::InnerConnectApiWrapper, AppendEntriesResponse, CurpError, InstallSnapshotResponse, + }, + server::{ + metrics, + raw_curp::{node_state::NodeState, AppendEntries, Heartbeat, SyncAction}, + RawCurp, + }, + snapshot::Snapshot, +}; + +use super::CurpNode; + +/// All handles of the replication tasks +#[derive(Default)] +pub(super) struct Handles { + /// Handles + inner: Vec>, +} + +impl Handles { + /// Abort all replication tasks + fn abort_all(&mut self) -> impl Future { + for handle in &self.inner { + handle.abort(); + } + join_all(self.inner.drain(..)).map(|results| { + debug!("aborted replication tasks, results: {results:?}"); + }) + } + + /// Replace with new handles + fn replace_with(&mut self, handles: impl IntoIterator>) { + self.inner.extend(handles); + } +} + +/// Represents various actions that can be performed on the `RawCurp` state machine +enum Action { + /// Update the match index for a given node. + /// Contains (node_id, match_index) + UpdateMatchIndex((u64, LogIndex)), + + /// Update the next index for a given node. + /// Contains (node_id, next_index) + UpdateNextIndex((u64, LogIndex)), + + /// Request to get the log starting from a specific index. + /// Contains a tuple with the starting log index and a sender to send the sync action. + GetLogFrom((LogIndex, oneshot::Sender>)), + + /// Step down the current node. + /// Contains the latest term. + StepDown(u64), +} + +impl, RC: RoleChange> CurpNode { + #[allow(clippy::arithmetic_side_effects)] // a log index(u64) should never overflow + /// Respawn replication tasks base on current node states + /// + /// The following assumption holds: + /// * This method can only be called by the leader + /// This method must be called under the following conditions: + /// * When a new leader is elected + /// * When membership changes + pub(super) async fn respawn_replication(&self) { + let self_id = self.curp.id(); + let cfg = self.curp.cfg().clone(); + let self_term = self.curp.term(); + let mut node_states = self.curp.all_node_states(); + // we don't needs to sync to self + let _ignore = node_states.remove(&self_id); + let connects = node_states + .values() + .map(NodeState::connect) + .cloned() + .collect(); + let self_next_index = self.curp.last_log_index() + 1; + // TODO: use bounded + let (action_tx, action_rx) = flume::unbounded(); + let curp = Arc::clone(&self.curp); + self.replication_handles + .map_lock(|mut h| h.abort_all()) + .await; + + let state_handle = tokio::spawn(Self::state_machine_worker(curp, action_rx)); + let heartbeat_handle = tokio::spawn(Self::heartbeat_worker( + connects, + cfg.clone(), + self_id, + self_term, + )); + let replication_handles = node_states.into_iter().map(|(id, state)| { + let cfg = cfg.clone(); + info!("spawning replication task for {id}"); + tokio::spawn(Self::replication_worker( + state, + action_tx.clone(), + self_id, + self_term, + cfg, + self_next_index, + )) + }); + + self.replication_handles.lock().replace_with( + replication_handles + .chain([state_handle]) + .chain([heartbeat_handle]), + ); + } + + /// A worker responsible for synchronizing data with the curp state machine + async fn state_machine_worker( + curp: Arc>, + action_rx: flume::Receiver>, + ) { + // As we spawn the workers on every leader update, the term remains consistent + let self_term = curp.term(); + while let Ok(update) = action_rx.recv_async().await { + match update { + Action::UpdateMatchIndex((node_id, index)) => { + debug!("updating {node_id}'s match index to {index}"); + curp.update_match_index(node_id, index); + curp.try_update_commit_index(index, self_term); + } + Action::UpdateNextIndex((node_id, index)) => { + debug!("updating {node_id}'s next index to {index}"); + curp.update_next_index(node_id, index); + } + Action::GetLogFrom((next, tx)) => { + debug!("getting log from index {next}"); + let sync = curp.sync_from(next); + if tx.send(sync).is_err() { + error!("send append entries failed"); + } + } + Action::StepDown(node_term) => { + debug_assert!(node_term > self_term, "node_term no greater than self_term"); + info!("received greater term: {node_term}, stepping down."); + curp.step_down(node_term); + break; + } + } + } + // tx dropped, exit + debug!("state update task exit"); + } + + /// A worker responsible for sending heartbeat to the cluster + async fn heartbeat_worker( + connects: Vec, + cfg: CurpConfig, + self_id: u64, + self_term: u64, + ) { + let timeout = cfg.rpc_timeout; + let mut ticker = tokio::time::interval(cfg.heartbeat_interval); + ticker.set_missed_tick_behavior(MissedTickBehavior::Delay); + let heartbeat = Heartbeat::new(self_term, self_id); + loop { + let _inst = ticker.tick().await; + for connect in &connects { + if let Err(err) = connect.append_entries(heartbeat.into(), timeout).await { + warn!("heartbeat to {} failed, {err:?}", connect.id()); + metrics::get().heartbeat_send_failures.add(1, &[]); + } + } + } + } + + /// A worker responsible for appending log entries to other nodes in the cluster + async fn replication_worker( + node_state: NodeState, + action_tx: flume::Sender>, + self_id: u64, + self_term: u64, + cfg: CurpConfig, + self_next_index: LogIndex, + ) { + let rpc_timeout = cfg.rpc_timeout; + let batch_timeout = cfg.batch_timeout; + let connect = node_state.connect(); + let sync_event = node_state.sync_event(); + let mut next_index = node_state.next_index(); + // The next_index could be zero if a new leader is elected and it does not have the + // infomations of other nodes. We set the initial index to the next index of the + // current node. + if next_index == 0 { + next_index = self_next_index; + } + + loop { + let _ignore = tokio::time::timeout(batch_timeout, sync_event.listen()).await; + let (tx, rx) = oneshot::channel(); + if let Err(err) = action_tx.send(Action::GetLogFrom((next_index, tx))) { + error!("action_rx unexpectedly closed: {err}"); + } + + let action = match rx.await { + Ok(SyncAction::AppendEntries(ae)) => { + Self::handle_append_entries(&ae, connect, rpc_timeout, self_id, self_term).await + } + Ok(SyncAction::Snapshot(rx)) => { + Self::handle_snapshot(rx, connect, self_id, self_term).await + } + Err(err) => { + error!("channel unexpectedly closed: {err}"); + return; + } + }; + + if let Some(action) = action { + if let Action::UpdateNextIndex((_, index)) = action { + next_index = index; + } + if let Err(err) = action_tx.send(action) { + error!("action_rx was accidentally dropped: {err}"); + } + } + } + } + + /// Handle append entries + async fn handle_append_entries( + ae: &AppendEntries, + connect: &InnerConnectApiWrapper, + rpc_timeout: Duration, + self_id: u64, + self_term: u64, + ) -> Option> { + // no new entries to append + if ae.entries.is_empty() { + return None; + } + Self::send_append_entries(connect, ae, rpc_timeout, self_id) + .await + .map(|resp| Self::append_entries_action(resp, ae, connect.id(), self_term)) + .map_err(|err| warn!("ae to {} failed, {err:?}", connect.id())) + .ok() + } + + /// Send `append_entries` request + async fn send_append_entries( + connect: &InnerConnectApiWrapper, + ae: &AppendEntries, + timeout: Duration, + self_id: u64, + ) -> Result { + debug!("{self_id} send append_entries to {}", connect.id()); + + connect + .append_entries(ae.into(), timeout) + .await + .map(Response::into_inner) + .map_err(Into::into) + } + + #[allow(clippy::as_conversions, clippy::arithmetic_side_effects)] // converting usize to u64 is safe + /// Generate `Action` from append entries response + fn append_entries_action( + resp: AppendEntriesResponse, + ae: &AppendEntries, + node_id: u64, + self_term: u64, + ) -> Action { + let other_term = resp.term; + let success = resp.success; + let hint_index = resp.hint_index; + + if self_term < other_term { + return Action::StepDown(other_term); + } + + if !success { + return Action::UpdateNextIndex((node_id, hint_index)); + } + + let last_sent_index = ae.prev_log_index + ae.entries.len() as u64; + Action::UpdateMatchIndex((node_id, last_sent_index)) + } + + /// Handle snapshot + async fn handle_snapshot( + rx: oneshot::Receiver, + connect: &InnerConnectApiWrapper, + self_id: u64, + self_term: u64, + ) -> Option> { + let snapshot = rx + .await + .map_err(|err| warn!("failed to receive snapshot result, {err}")) + .ok()?; + let last_include_index = snapshot.meta.last_included_index; + Self::send_snapshot1(connect, snapshot, self_id, self_term) + .await + .map(|resp| Self::snapshot_action(resp, connect.id(), self_term, last_include_index)) + .map_err(|err| warn!("snapshot to {} failed, {err:?}", connect.id())) + .ok() + } + + /// Send snapshot + async fn send_snapshot1( + connect: &InnerConnectApiWrapper, + snapshot: Snapshot, + self_id: u64, + self_term: u64, + ) -> Result { + connect + .install_snapshot(self_term, self_id, snapshot) + .await + .map(Response::into_inner) + .map_err(Into::into) + } + + /// Generate `Action` from snapshot response + fn snapshot_action( + resp: InstallSnapshotResponse, + node_id: u64, + self_term: u64, + last_include_index: LogIndex, + ) -> Action { + let other_term = resp.term; + if self_term < other_term { + return Action::StepDown(other_term); + } + Action::UpdateMatchIndex((node_id, last_include_index)) + } +} diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 5abf5526e..061fd71e7 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -50,6 +50,7 @@ use utils::task_manager::TaskManager; use utils::ClientTlsConfig; use self::log::Log; +use self::node_state::NodeState; use self::node_state::NodeStates; use self::state::CandidateState; use self::state::LeaderState; @@ -280,6 +281,36 @@ pub(super) struct Vote { pub(super) is_pre_vote: bool, } +/// A heartbeat +#[derive(Debug, Clone, Copy)] +pub(super) struct Heartbeat { + /// Leader's term + term: u64, + /// Leader's id + leader_id: ServerId, +} + +impl Heartbeat { + /// Creates a new `Heartbeat` + pub(super) fn new(term: u64, leader_id: ServerId) -> Self { + Self { term, leader_id } + } +} + +impl From for crate::rpc::AppendEntriesRequest { + fn from(hb: Heartbeat) -> Self { + Self { + term: hb.term, + leader_id: hb.leader_id, + // not used for a heartbeat + prev_log_index: 0, + prev_log_term: 0, + leader_commit: 0, + entries: vec![], + } + } +} + /// Invoked by leader to replicate log entries; also used as heartbeat pub(super) struct AppendEntries { /// Leader's term @@ -296,6 +327,26 @@ pub(super) struct AppendEntries { pub(super) entries: Vec>>, } +impl From<&AppendEntries> for crate::rpc::AppendEntriesRequest { + fn from(ae: &AppendEntries) -> Self { + let entries_serialized = ae + .entries + .iter() + .map(bincode::serialize) + .collect::>>>() + .unwrap_or_else(|e| unreachable!("bincode serialization should never fail, err: {e}")); + + Self { + term: ae.term, + leader_id: ae.leader_id, + prev_log_index: ae.prev_log_index, + prev_log_term: ae.prev_log_term, + leader_commit: ae.leader_commit, + entries: entries_serialized, + } + } +} + /// Curp Role #[derive(Debug, Clone, Copy, PartialEq)] enum Role { @@ -805,6 +856,20 @@ impl RawCurp { Ok(true) } + /// Check if `commit_index` needs to be updated + pub(super) fn try_update_commit_index(&self, index: LogIndex, term: u64) { + let log_r = self.log.upgradable_read(); + if self.can_update_commit_index_to(&log_r, index, term) { + let mut log_w = RwLockUpgradableReadGuard::upgrade(log_r); + if index > log_w.commit_index { + log_w.commit_to(index); + self.update_membership_state(None, None, Some(index)); + debug!("{} updates commit index to {index}", self.id()); + self.apply(&mut *log_w); + } + } + } + /// Handle `vote` /// Return `Ok(term, spec_pool)` if the vote is granted /// Return `Err(Some(term))` if the vote is rejected @@ -1279,6 +1344,49 @@ impl RawCurp { } } + /// Get `append_entries` request for `follower_id` that contains the latest + /// log entries + pub(super) fn sync_from(&self, next_index: LogIndex) -> SyncAction { + let term = self.st.read().term; + let log_r = self.log.read(); + + if next_index <= log_r.base_index { + // the log has already been compacted + let entry = log_r.get(log_r.last_exe).unwrap_or_else(|| { + unreachable!( + "log entry {} should not have been compacted yet, needed for snapshot", + log_r.last_as + ) + }); + // TODO: buffer a local snapshot: if a follower is down for a long time, + // the leader will take a snapshot itself every time `sync` is called in effort + // to calibrate it. Since taking a snapshot will block the leader's + // execute workers, we should not take snapshot so often. A better + // solution would be to keep a snapshot cache. + let meta = SnapshotMeta { + last_included_index: entry.index, + last_included_term: entry.term, + }; + let (tx, rx) = oneshot::channel(); + if let Err(e) = self.ctx.as_tx.send(TaskType::Snapshot(meta, tx)) { + error!("failed to send task to after sync: {e}"); + } + SyncAction::Snapshot(rx) + } else { + let (prev_log_index, prev_log_term) = log_r.get_prev_entry_info(next_index); + let entries = log_r.get_from(next_index); + let ae = AppendEntries { + term, + leader_id: self.id(), + prev_log_index, + prev_log_term, + leader_commit: log_r.commit_index, + entries, + }; + SyncAction::AppendEntries(ae) + } + } + /// Get a reference to `CurpConfig` pub(super) fn cfg(&self) -> &CurpConfig { self.ctx.cfg.as_ref() @@ -1416,8 +1524,24 @@ impl RawCurp { self.ctx.client_tls_config.as_ref() } - /// Get a range of log entry + /// Leader step down + pub(crate) fn step_down(&self, term: u64) { + let mut st = self.st.write(); + self.update_to_term_and_become_follower(&mut st, term); + } + + /// Updates the next index of the give node + pub(crate) fn update_next_index(&self, node_id: u64, index: LogIndex) { + self.ctx.node_states.update_next_index(node_id, index); + } + + /// Get all node states + pub(super) fn all_node_states(&self) -> BTreeMap { + self.ctx.node_states.all_states() + } + #[cfg(test)] + /// Get a range of log entry pub(crate) fn get_log_from(&self, idx: u64) -> Vec>> { self.log.read().get_from(idx) } @@ -1775,7 +1899,7 @@ impl RawCurp { } /// Update match index, also updates the monitoring ids - fn update_match_index(&self, id: u64, index: LogIndex) { + pub(crate) fn update_match_index(&self, id: u64, index: LogIndex) { self.ctx.node_states.update_match_index(id, index); let latest = self.log.read().last_log_index(); // removes the entry if the node is up-to-date. diff --git a/crates/curp/src/server/raw_curp/node_state.rs b/crates/curp/src/server/raw_curp/node_state.rs index 329a7c2dd..f769c021b 100644 --- a/crates/curp/src/server/raw_curp/node_state.rs +++ b/crates/curp/src/server/raw_curp/node_state.rs @@ -164,7 +164,6 @@ impl NodeStates { } /// Get all node states - #[cfg(test)] pub(super) fn all_states(&self) -> BTreeMap { self.states.read().clone() } @@ -199,8 +198,13 @@ impl NodeState { &self.status } + /// Get the next index of the current node + pub(crate) fn next_index(&self) -> LogIndex { + self.status.next_index + } + /// Get the connection to the node - pub(super) fn connect(&self) -> &InnerConnectApiWrapper { + pub(crate) fn connect(&self) -> &InnerConnectApiWrapper { &self.connect } @@ -210,7 +214,7 @@ impl NodeState { } /// Get the sync event trigger for a follower - pub(super) fn sync_event(&self) -> &Event { + pub(crate) fn sync_event(&self) -> &Arc { &self.sync_event } From e1287bea24680f826a1e38885111e2833fafdd93 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 17 Oct 2024 09:47:20 +0800 Subject: [PATCH 257/322] refactor: remove old replication code Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/rpc/mod.rs | 22 -- .../curp/src/server/curp_node/member_impl.rs | 31 +- crates/curp/src/server/curp_node/mod.rs | 276 +----------------- crates/curp/src/server/raw_curp/mod.rs | 164 ----------- crates/curp/src/server/raw_curp/node_state.rs | 25 +- crates/curp/src/server/raw_curp/tests.rs | 4 + 6 files changed, 25 insertions(+), 497 deletions(-) diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index 39209481c..ef621ea57 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -273,28 +273,6 @@ impl SyncedResponse { } impl AppendEntriesRequest { - /// Create a new `append_entries` request - pub(crate) fn new( - term: u64, - leader_id: ServerId, - prev_log_index: LogIndex, - prev_log_term: u64, - entries: Vec>>, - leader_commit: LogIndex, - ) -> bincode::Result { - Ok(Self { - term, - leader_id, - prev_log_index, - prev_log_term, - entries: entries - .into_iter() - .map(|e| bincode::serialize(&e)) - .collect::>>>()?, - leader_commit, - }) - } - /// Get log entries pub(crate) fn entries(&self) -> bincode::Result>> { self.entries diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index ebb37ef4a..88c9f482f 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -6,7 +6,6 @@ use std::collections::BTreeMap; use std::collections::HashSet; -use std::sync::Arc; use curp_external_api::cmd::Command; use curp_external_api::cmd::CommandExecutor; @@ -15,7 +14,6 @@ use curp_external_api::LogIndex; use tokio_stream::wrappers::BroadcastStream; use tokio_stream::StreamExt; use tracing::debug; -use utils::task_manager::tasks::TaskName; use super::CurpNode; use crate::log_entry::EntryData; @@ -33,7 +31,6 @@ use crate::rpc::ProposeId; use crate::rpc::Redirect; use crate::rpc::WaitLearnerRequest; use crate::rpc::WaitLearnerResponse; -use crate::server::raw_curp::node_state::NodeState; // Leader methods impl, RC: RoleChange> CurpNode { @@ -179,8 +176,7 @@ impl, RC: RoleChange> CurpNode { /// Updates the membership config pub(crate) fn update_states_with_membership(&self, membership: &Membership) { let connects = self.connect_other_nodes(membership); - let new_states = self.curp.update_node_states(connects); - self.spawn_sync_follower_tasks(new_states.into_values()); + let _new_states = self.curp.update_node_states(connects); self.curp.update_role(); } @@ -217,28 +213,12 @@ impl, RC: RoleChange> CurpNode { inner_connects(nodes, self.curp.client_tls_config()).collect() } - - /// Spawns background follower sync tasks - pub(super) fn spawn_sync_follower_tasks(&self, new_nodes: impl IntoIterator) { - let task_manager = self.curp.task_manager(); - for (connect, sync_event, remove_event) in new_nodes.into_iter().map(NodeState::into_parts) - { - task_manager.spawn(TaskName::SyncFollower, |n| { - Self::sync_follower_task( - Arc::clone(&self.curp), - connect, - sync_event, - remove_event, - n, - ) - }); - } - } } +#[cfg(ignore)] // TODO: rewrite this test #[cfg(test)] mod test { - use std::time::Duration; + use std::{sync::Arc, time::Duration}; use curp_test_utils::{ mock_role_change, @@ -249,7 +229,10 @@ mod test { use parking_lot::{Mutex, RwLock}; use tokio::sync::mpsc; use tracing_test::traced_test; - use utils::{config::EngineConfig, task_manager::TaskManager}; + use utils::{ + config::EngineConfig, + task_manager::{tasks::TaskName, TaskManager}, + }; use crate::{ rpc::NodeMetadata, diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index be69733e3..ee3f05549 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -5,9 +5,8 @@ use std::{ time::{Duration, Instant}, }; -use clippy_utilities::{NumericCast, OverflowArithmetic}; +use clippy_utilities::NumericCast; use engine::{SnapshotAllocator, SnapshotApi}; -use event_listener::Event; use futures::{pin_mut, stream::FuturesUnordered, FutureExt, Stream, StreamExt}; use madsim::rand::{thread_rng, Rng}; use opentelemetry::KeyValue; @@ -15,14 +14,18 @@ use parking_lot::{Mutex, RwLock}; use tokio::{sync::oneshot, time::MissedTickBehavior}; #[cfg(not(madsim))] use tonic::transport::ClientTlsConfig; -use tracing::{debug, error, info, trace, warn}; +use tracing::{debug, error, info, warn}; #[cfg(madsim)] -use utils::ClientTlsConfig; use utils::{ barrier::IdBarrier, config::CurpConfig, task_manager::{tasks::TaskName, Listener, State, TaskManager}, }; +use utils::{ + barrier::IdBarrier, + config::CurpConfig, + task_manager::{tasks::TaskName, Listener, TaskManager}, +}; use super::{ cmd_board::{CmdBoardRef, CommandBoard}, @@ -31,19 +34,17 @@ use super::{ conflict::uncommitted_pool::{UcpObject, UncommittedPool}, gc::gc_client_lease, lease_manager::LeaseManager, - raw_curp::{AppendEntries, RawCurp, Vote}, + raw_curp::{RawCurp, Vote}, storage::StorageApi, }; use crate::{ cmd::{Command, CommandExecutor}, - log_entry::{EntryData, LogEntry}, + log_entry::LogEntry, member::{MembershipConfig, MembershipInfo}, response::ResponseSender, role_change::RoleChange, rpc::{ - self, - connect::{InnerConnectApi, InnerConnectApiWrapper}, - AppendEntriesRequest, AppendEntriesResponse, CurpError, FetchMembershipRequest, + self, AppendEntriesRequest, AppendEntriesResponse, CurpError, FetchMembershipRequest, FetchReadStateRequest, FetchReadStateResponse, InstallSnapshotRequest, InstallSnapshotResponse, LeaseKeepAliveMsg, MembershipResponse, MoveLeaderRequest, MoveLeaderResponse, PoolEntry, ProposeId, ProposeRequest, ProposeResponse, @@ -54,7 +55,6 @@ use crate::{ server::{ cmd_worker::{after_sync, worker_reset, worker_snapshot}, metrics, - raw_curp::SyncAction, storage::db::DB, }, snapshot::{Snapshot, SnapshotMeta}, @@ -643,73 +643,6 @@ impl, RC: RoleChange> CurpNode { } } - /// This task will keep a follower up-to-data when current node is leader, - /// and it will wait for `leader_event` if current node is not leader - #[allow(clippy::arithmetic_side_effects, clippy::ignored_unit_patterns)] // tokio select internal triggered - pub(crate) async fn sync_follower_task( - curp: Arc>, - connect: InnerConnectApiWrapper, - sync_event: Arc, - remove_event: Arc, - shutdown_listener: Listener, - ) { - debug!("{} to {} sync follower task start", curp.id(), connect.id()); - let _guard = shutdown_listener.sync_follower_guard(); - let mut ticker = tokio::time::interval(curp.cfg().heartbeat_interval); - ticker.set_missed_tick_behavior(MissedTickBehavior::Delay); - let connect_id = connect.id(); - let batch_timeout = curp.cfg().batch_timeout; - let leader_event = curp.leader_event(); - - if !curp.is_leader() { - tokio::select! { - _ = shutdown_listener.wait_state() => return, - _ = remove_event.listen() => return, - _ = leader_event.listen() => {} - } - } - let mut hb_opt = false; - let mut is_shutdown_state = false; - let mut ae_fail_count = 0; - loop { - // a sync is either triggered by an heartbeat timeout event or when new log - // entries arrive - tokio::select! { - state = shutdown_listener.wait_state(), if !is_shutdown_state => { - match state { - State::Running => unreachable!("wait state should not return Run"), - State::Shutdown => return, - State::ClusterShutdown => is_shutdown_state = true, - } - }, - _ = remove_event.listen() => return, - _now = ticker.tick() => hb_opt = false, - res = tokio::time::timeout(batch_timeout, sync_event.listen()) => { - if let Err(_e) = res { - hb_opt = true; - } - } - } - - let Some(sync_action) = curp.sync(connect_id) else { - break; - }; - if Self::handle_sync_action( - sync_action, - &mut hb_opt, - is_shutdown_state, - &mut ae_fail_count, - connect.as_ref(), - curp.as_ref(), - ) - .await - { - break; - }; - } - debug!("{} to {} sync follower task exits", curp.id(), connect.id()); - } - /// After sync task async fn after_sync_task( curp: Arc>, @@ -848,25 +781,6 @@ impl, RC: RoleChange> CurpNode { Self::election_task(Arc::clone(&curp), n) }); - let self_id = curp.id(); - curp.with_member_connects(|connects| { - for (id, c) in connects { - if *id == self_id { - continue; - } - let (sync_event, remove_event) = curp.events(c.id()); - task_manager.spawn(TaskName::SyncFollower, |n| { - Self::sync_follower_task( - Arc::clone(&curp), - c.clone(), - sync_event, - Arc::clone(&remove_event), - n, - ) - }); - } - }); - task_manager.spawn(TaskName::HandlePropose, |_n| { Self::handle_propose_task(Arc::clone(&cmd_executor), Arc::clone(&curp), propose_rx) }); @@ -946,173 +860,10 @@ impl, RC: RoleChange> CurpNode { None } - /// Send `append_entries` request - /// Return `tonic::Error` if meet network issue - /// Return (`leader_retires`, `ae_succeed`) - #[allow(clippy::arithmetic_side_effects)] // won't overflow - async fn send_ae( - connect: &(impl InnerConnectApi + ?Sized), - curp: &RawCurp, - ae: AppendEntries, - ) -> Result<(bool, bool), CurpError> { - let last_sent_index = (!ae.entries.is_empty()) - .then(|| ae.prev_log_index + ae.entries.len().numeric_cast::()); - let is_heartbeat = ae.entries.is_empty(); - let req = AppendEntriesRequest::new( - ae.term, - ae.leader_id, - ae.prev_log_index, - ae.prev_log_term, - ae.entries, - ae.leader_commit, - )?; - - if is_heartbeat { - trace!("{} send heartbeat to {}", curp.id(), connect.id()); - } else { - debug!("{} send append_entries to {}", curp.id(), connect.id()); - } - - let resp = connect - .append_entries(req, curp.cfg().rpc_timeout) - .await? - .into_inner(); - - let Ok(ae_succeed) = curp.handle_append_entries_resp( - connect.id(), - last_sent_index, - resp.term, - resp.success, - resp.hint_index, - ) else { - return Ok((true, false)); - }; - - Ok((false, ae_succeed)) - } - - /// Send snapshot - /// Return `tonic::Error` if meet network issue - /// Return `leader_retires` - async fn send_snapshot( - connect: &(impl InnerConnectApi + ?Sized), - curp: &RawCurp, - snapshot: Snapshot, - ) -> Result { - let meta = snapshot.meta; - let resp = connect - .install_snapshot(curp.term(), curp.id(), snapshot) - .await? - .into_inner(); - Ok(curp - .handle_snapshot_resp(connect.id(), meta, resp.term) - .is_err()) - } - /// Get `RawCurp` pub(super) fn raw_curp(&self) -> Arc> { Arc::clone(&self.curp) } - - /// Handle `SyncAction` - /// If no longer need to sync to this node, return true - async fn handle_sync_action( - sync_action: SyncAction, - hb_opt: &mut bool, - is_shutdown_state: bool, - ae_fail_count: &mut u32, - connect: &(impl InnerConnectApi + ?Sized), - curp: &RawCurp, - ) -> bool { - let connect_id = connect.id(); - match sync_action { - SyncAction::AppendEntries(ae) => { - let is_empty = ae.entries.is_empty(); - let is_commit_shutdown = ae.entries.last().is_some_and(|e| { - matches!(e.entry_data, EntryData::Shutdown) && e.index == ae.leader_commit - }); - // (hb_opt, entries) status combination - // (false, empty) => send heartbeat to followers - // (true, empty) => indicates that `batch_timeout` expired, and during this - // period there is not any log generated. Do nothing - // (true | false, not empty) => send append entries - if !*hb_opt || !is_empty { - match Self::send_ae(connect, curp, ae).await { - Ok((true, _)) => return true, - Ok((false, ae_succeed)) => { - if ae_succeed { - *hb_opt = true; - if curp - .get_transferee() - .is_some_and(|transferee| transferee == connect_id) - && curp - .get_match_index(connect_id) - .is_some_and(|idx| idx == curp.last_log_index()) - { - if let Err(e) = connect - .try_become_leader_now(curp.cfg().wait_synced_timeout) - .await - { - warn!( - "{} send try become leader now to {} failed: {:?}", - curp.id(), - connect_id, - e - ); - }; - } - } else { - debug!("ae rejected by {}", connect.id()); - } - // Check Follower shutdown - // When the leader is in the shutdown state, its last log must be - // shutdown, and if the follower is - // already synced with leader and current AE is a heartbeat, then the - // follower will commit the shutdown - // log after AE, or when the follower is not synced with the leader, the - // current AE will send and directly commit - // shutdown log. - if is_shutdown_state - && ((curp.is_synced(connect_id) && is_empty) - || (!curp.is_synced(connect_id) && is_commit_shutdown)) - { - if let Err(e) = connect.trigger_shutdown().await { - warn!("trigger shutdown to {} failed, {e}", connect_id); - } else { - debug!("trigger shutdown to {} success", connect_id); - } - return true; - } - } - Err(err) => { - if is_empty { - metrics::get().heartbeat_send_failures.add(1, &[]); - } - warn!("ae to {} failed, {err:?}", connect.id()); - if is_shutdown_state { - *ae_fail_count = ae_fail_count.overflow_add(1); - if *ae_fail_count >= 5 { - warn!("the follower {} may have been shutdown", connect_id); - return true; - } - } - } - }; - } - } - SyncAction::Snapshot(rx) => match rx.await { - Ok(snapshot) => match Self::send_snapshot(connect, curp, snapshot).await { - Ok(true) => return true, - Err(err) => warn!("snapshot to {} failed, {err:?}", connect.id()), - Ok(false) => {} - }, - Err(err) => { - warn!("failed to receive snapshot result, {err}"); - } - }, - } - false - } } impl, RC: RoleChange> Debug for CurpNode { @@ -1126,12 +877,7 @@ impl, RC: RoleChange> Debug for CurpNode RawCurp { Ok((term, truncate_at, to_persist)) } - /// Handle `append_entries` response - /// Return `Ok(ae_succeeded)` - /// Return `Err(())` if self is no longer the leader - pub(super) fn handle_append_entries_resp( - &self, - follower_id: ServerId, - last_sent_index: Option, // None means the ae is a heartbeat - term: u64, - success: bool, - hint_index: LogIndex, - ) -> Result { - // validate term - let (cur_term, cur_role) = self.st.map_read(|st_r| (st_r.term, st_r.role)); - if cur_term < term { - let mut st_w = self.st.write(); - self.update_to_term_and_become_follower(&mut st_w, term); - return Err(()); - } - if cur_role != Role::Leader { - return Err(()); - } - - if !success { - self.ctx - .node_states - .update_next_index(follower_id, hint_index); - debug!( - "{} updates follower {}'s next_index to {hint_index} because it rejects ae", - self.id(), - follower_id, - ); - return Ok(false); - } - - // if ae is a heartbeat, return - let Some(last_sent_index) = last_sent_index else { - return Ok(true); - }; - - self.update_match_index(follower_id, last_sent_index); - - // check if commit_index needs to be updated - let log_r = self.log.upgradable_read(); - if self.can_update_commit_index_to(&log_r, last_sent_index, cur_term) { - let mut log_w = RwLockUpgradableReadGuard::upgrade(log_r); - if last_sent_index > log_w.commit_index { - log_w.commit_to(last_sent_index); - self.update_membership_state(None, None, Some(last_sent_index)); - debug!("{} updates commit index to {last_sent_index}", self.id()); - self.apply(&mut *log_w); - } - } - - Ok(true) - } - /// Check if `commit_index` needs to be updated pub(super) fn try_update_commit_index(&self, index: LogIndex, term: u64) { let log_r = self.log.upgradable_read(); @@ -1123,29 +1066,6 @@ impl RawCurp { validate } - /// Handle `install_snapshot` resp - /// Return Err(()) if the current node isn't a leader or current term is - /// less than the given term - pub(super) fn handle_snapshot_resp( - &self, - follower_id: ServerId, - meta: SnapshotMeta, - term: u64, - ) -> Result<(), ()> { - // validate term - let (cur_term, cur_role) = self.st.map_read(|st_r| (st_r.term, st_r.role)); - if cur_term < term { - let mut st_w = self.st.write(); - self.update_to_term_and_become_follower(&mut st_w, term); - return Err(()); - } - if cur_role != Role::Leader { - return Err(()); - } - self.update_match_index(follower_id, meta.last_included_index.numeric_cast()); - Ok(()) - } - /// Handle `fetch_read_state` pub(super) fn handle_fetch_read_state(&self, cmd: Arc) -> ReadState { let ids: Vec<_> = self @@ -1288,62 +1208,6 @@ impl RawCurp { ms_r.cluster().committed().clone() } - /// Get `append_entries` request for `follower_id` that contains the latest - /// log entries - pub(super) fn sync(&self, follower_id: ServerId) -> Option> { - let term = { - let st_r = self.st.read(); - if st_r.role != Role::Leader { - return None; - } - st_r.term - }; - - let Some(next_index) = self.ctx.node_states.get_next_index(follower_id) else { - warn!( - "follower {} is not found, it maybe has been removed", - follower_id - ); - return None; - }; - let log_r = self.log.read(); - if next_index <= log_r.base_index { - // the log has already been compacted - let entry = log_r.get(log_r.last_exe).unwrap_or_else(|| { - unreachable!( - "log entry {} should not have been compacted yet, needed for snapshot", - log_r.last_as - ) - }); - // TODO: buffer a local snapshot: if a follower is down for a long time, - // the leader will take a snapshot itself every time `sync` is called in effort - // to calibrate it. Since taking a snapshot will block the leader's - // execute workers, we should not take snapshot so often. A better - // solution would be to keep a snapshot cache. - let meta = SnapshotMeta { - last_included_index: entry.index, - last_included_term: entry.term, - }; - let (tx, rx) = oneshot::channel(); - if let Err(e) = self.ctx.as_tx.send(TaskType::Snapshot(meta, tx)) { - error!("failed to send task to after sync: {e}"); - } - Some(SyncAction::Snapshot(rx)) - } else { - let (prev_log_index, prev_log_term) = log_r.get_prev_entry_info(next_index); - let entries = log_r.get_from(next_index); - let ae = AppendEntries { - term, - leader_id: self.id(), - prev_log_index, - prev_log_term, - leader_commit: log_r.commit_index, - entries, - }; - Some(SyncAction::AppendEntries(ae)) - } - } - /// Get `append_entries` request for `follower_id` that contains the latest /// log entries pub(super) fn sync_from(&self, next_index: LogIndex) -> SyncAction { @@ -1397,11 +1261,6 @@ impl RawCurp { self.st.read().role == Role::Leader } - /// Get leader event - pub(super) fn leader_event(&self) -> Arc { - Arc::clone(&self.ctx.leader_event) - } - /// Reset log base pub(super) fn reset_by_snapshot(&self, meta: SnapshotMeta) { let mut log_w = self.log.write(); @@ -1456,16 +1315,6 @@ impl RawCurp { Arc::clone(&self.task_manager) } - /// Check if the specified follower has caught up with the leader - pub(super) fn is_synced(&self, node_id: ServerId) -> bool { - let log_r = self.log.read(); - let leader_commit_index = log_r.commit_index; - self.ctx - .node_states - .get_match_index(node_id) - .is_some_and(|match_index| match_index == leader_commit_index) - } - /// Get rpc connect connects by ids pub(super) fn connects<'a, Ids: IntoIterator>( &self, @@ -1474,14 +1323,6 @@ impl RawCurp { self.ctx.node_states.connects(ids) } - /// Get all connects - pub(super) fn with_member_connects(&self, mut op: F) -> R - where - F: FnMut(&BTreeMap) -> R, - { - op(&self.ctx.node_states.all_connects()) - } - /// Get voters connects pub(super) fn voters_connects(&self) -> BTreeMap> { let voters = self.ms.map_read(|ms| ms.members_ids()); @@ -1498,11 +1339,6 @@ impl RawCurp { self.lst.get_transferee() } - /// Get match index of a node - pub(super) fn get_match_index(&self, id: ServerId) -> Option { - self.ctx.node_states.get_match_index(id) - } - /// Get last log index pub(super) fn last_log_index(&self) -> u64 { self.log.read().last_log_index() diff --git a/crates/curp/src/server/raw_curp/node_state.rs b/crates/curp/src/server/raw_curp/node_state.rs index f769c021b..3c7451b5d 100644 --- a/crates/curp/src/server/raw_curp/node_state.rs +++ b/crates/curp/src/server/raw_curp/node_state.rs @@ -90,6 +90,8 @@ impl NodeStates { warn!("follower {} is not found, it maybe has been removed", id); }; } + + #[cfg(test)] /// Get `next_index` for server pub(super) fn get_next_index(&self, id: u64) -> Option { let states_r = self.states.read(); @@ -153,16 +155,6 @@ impl NodeStates { .into_iter() } - /// Get all rpc connects - pub(super) fn all_connects(&self) -> BTreeMap { - let states_r = self.states.read(); - states_r - .keys() - .copied() - .zip(states_r.values().map(NodeState::connect).cloned()) - .collect() - } - /// Get all node states pub(super) fn all_states(&self) -> BTreeMap { self.states.read().clone() @@ -227,19 +219,8 @@ impl NodeState { pub(super) fn status_mut(&mut self) -> &mut NodeStatus { &mut self.status } - - /// Decomposes the `NodeState` into its constituent parts. - pub(crate) fn into_parts(self) -> (InnerConnectApiWrapper, Arc, Arc) { - let NodeState { - connect, - sync_event, - remove_event, - .. - } = self; - - (connect, sync_event, remove_event) - } } + #[cfg(test)] mod tests { use utils::parking_lot_lock::RwLockMap; diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index fd6bb0a04..9942068c0 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -213,6 +213,7 @@ fn follower_handle_propose_will_reject_conflicted() { /*************** tests for append_entries(heartbeat) **************/ +#[cfg(ignore)] // TODO: rewrite this test #[traced_test] #[test] fn heartbeat_will_calibrate_term() { @@ -228,6 +229,7 @@ fn heartbeat_will_calibrate_term() { assert_eq!(st_r.role, Role::Follower); } +#[cfg(ignore)] // TODO: rewrite this test #[traced_test] #[test] fn heartbeat_will_calibrate_next_index() { @@ -243,6 +245,7 @@ fn heartbeat_will_calibrate_next_index() { assert_eq!(curp.ctx.node_states.get_next_index(s1_id), Some(1)); } +#[cfg(ignore)] // TODO: rewrite this test #[traced_test] #[test] fn handle_ae_will_calibrate_term() { @@ -653,6 +656,7 @@ fn follower_handle_shutdown_will_reject() { )); } +#[cfg(ignore)] // TODO: rewrite this test #[traced_test] #[test] fn is_synced_should_return_true_when_followers_caught_up_with_leader() { From 137a19fb1e129a2f118705eb88182db7120079c2 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 17 Oct 2024 10:34:00 +0800 Subject: [PATCH 258/322] refactor: enable new replication implementation Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- Cargo.lock | 5 +- crates/curp/Cargo.toml | 1 + .../curp/src/server/curp_node/member_impl.rs | 2 + crates/curp/src/server/curp_node/mod.rs | 52 +++++--- .../curp/src/server/curp_node/replication.rs | 111 +++++++++--------- 5 files changed, 100 insertions(+), 71 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8a276146a..a0f313e9a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -663,6 +663,7 @@ dependencies = [ "futures", "indexmap 2.2.6", "itertools 0.13.0", + "lazy_static", "madsim", "madsim-tokio", "madsim-tonic", @@ -1479,9 +1480,9 @@ dependencies = [ [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "lazycell" diff --git a/crates/curp/Cargo.toml b/crates/curp/Cargo.toml index ced3646fc..0da671f5f 100644 --- a/crates/curp/Cargo.toml +++ b/crates/curp/Cargo.toml @@ -28,6 +28,7 @@ fs2 = "0.4.3" futures = "0.3.21" indexmap = "2.2.6" itertools = "0.13" +lazy_static = "1.5.0" madsim = { version = "0.2.27", features = ["rpc", "macros"] } opentelemetry = { version = "0.24.0", features = ["metrics"] } parking_lot = "0.12.3" diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 88c9f482f..a4a930f17 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -6,6 +6,7 @@ use std::collections::BTreeMap; use std::collections::HashSet; +use std::sync::Arc; use curp_external_api::cmd::Command; use curp_external_api::cmd::CommandExecutor; @@ -96,6 +97,7 @@ impl, RC: RoleChange> CurpNode { self.update_states_with_membership(&config); self.curp .update_membership_state(None, Some((index, config)), None); + Self::respawn_replication(Arc::clone(&self.curp)); self.curp.persistent_membership_state()?; // Leader also needs to update transferee self.curp.update_transferee(); diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index ee3f05549..0cb7ea5d6 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -136,8 +136,6 @@ pub(super) struct CurpNode, RC: RoleChange> { as_tx: flume::Sender>, /// Tx to send to propose task propose_tx: flume::Sender>, - /// All handles of the replication tasks - replication_handles: Mutex, } /// Handlers for clients @@ -630,14 +628,24 @@ impl, RC: RoleChange> CurpNode { // bcast pre vote or vote, if it is a pre vote and success, it will return // Some(vote) then we need to bcast normal vote, and bcast // normal vote always return None - if let Some(vote) = Self::bcast_vote(curp.as_ref(), pre_vote_or_vote.clone()).await + if let BCastVoteResult::PreVoteSuccess(vote) = + Self::bcast_vote(curp.as_ref(), pre_vote_or_vote.clone()).await { debug_assert!( !vote.is_pre_vote, "bcast pre vote should return Some(normal_vote)" ); - let opt = Self::bcast_vote(curp.as_ref(), vote).await; - debug_assert!(opt.is_none(), "bcast normal vote should always return None"); + let result = Self::bcast_vote(curp.as_ref(), vote).await; + debug_assert!( + matches!( + result, + BCastVoteResult::VoteSuccess | BCastVoteResult::VoteFail + ), + "bcast normal vote should always return Vote variants" + ); + if matches!(result, BCastVoteResult::VoteSuccess) { + Self::respawn_replication(Arc::clone(&curp)); + } } } } @@ -756,6 +764,10 @@ impl, RC: RoleChange> CurpNode { as_rx, ); + if is_leader { + Self::respawn_replication(Arc::clone(&curp)); + } + Ok(Self { curp, cmd_board, @@ -764,7 +776,6 @@ impl, RC: RoleChange> CurpNode { cmd_executor, as_tx, propose_tx, - replication_handles: Mutex::default(), }) } @@ -790,12 +801,7 @@ impl, RC: RoleChange> CurpNode { } /// Candidate or pre candidate broadcasts votes - /// - /// # Returns - /// - /// - `Some(vote)` if bcast pre vote and success - /// - `None` if bcast pre vote and fail or bcast vote - async fn bcast_vote(curp: &RawCurp, vote: Vote) -> Option { + async fn bcast_vote(curp: &RawCurp, vote: Vote) -> BCastVoteResult { let self_id = curp.id(); if vote.is_pre_vote { debug!("{self_id} broadcasts pre votes to all servers"); @@ -833,12 +839,12 @@ impl, RC: RoleChange> CurpNode { if vote.is_pre_vote { if resp.shutdown_candidate { curp.task_manager().shutdown(false).await; - return None; + return BCastVoteResult::PreVoteFail; } let result = curp.handle_pre_vote_resp(id, resp.term, resp.vote_granted); match result { Ok(None) | Err(()) => {} - Ok(Some(v)) => return Some(v), + Ok(Some(v)) => return BCastVoteResult::PreVoteSuccess(v), } } else { // collect follower spec pool @@ -853,11 +859,13 @@ impl, RC: RoleChange> CurpNode { curp.handle_vote_resp(id, resp.term, resp.vote_granted, follower_spec_pool); match result { Ok(false) => {} - Ok(true) | Err(()) => return None, + Ok(true) => return BCastVoteResult::VoteSuccess, + Err(()) => return BCastVoteResult::VoteFail, } }; } - None + + BCastVoteResult::PreVoteFail } /// Get `RawCurp` @@ -875,6 +883,18 @@ impl, RC: RoleChange> Debug for CurpNode>, -} - -impl Handles { - /// Abort all replication tasks - fn abort_all(&mut self) -> impl Future { - for handle in &self.inner { - handle.abort(); - } - join_all(self.inner.drain(..)).map(|results| { - debug!("aborted replication tasks, results: {results:?}"); - }) - } - - /// Replace with new handles - fn replace_with(&mut self, handles: impl IntoIterator>) { - self.inner.extend(handles); - } +// TODO: replace `lazy_static` with `LazyLock` after Rust version 1.80.0 +lazy_static::lazy_static! { + /// Replication handles + static ref HANDLES: Mutex>> = Mutex::new(Vec::new()); } /// Represents various actions that can be performed on the `RawCurp` state machine @@ -78,11 +59,14 @@ impl, RC: RoleChange> CurpNode { /// This method must be called under the following conditions: /// * When a new leader is elected /// * When membership changes - pub(super) async fn respawn_replication(&self) { - let self_id = self.curp.id(); - let cfg = self.curp.cfg().clone(); - let self_term = self.curp.term(); - let mut node_states = self.curp.all_node_states(); + pub(super) fn respawn_replication(curp: Arc>) { + /// The size of the action channel + const ACTION_CHANNEL_SIZE: usize = 0x1000; + + let self_id = curp.id(); + let cfg = curp.cfg().clone(); + let self_term = curp.term(); + let mut node_states = curp.all_node_states(); // we don't needs to sync to self let _ignore = node_states.remove(&self_id); let connects = node_states @@ -90,13 +74,9 @@ impl, RC: RoleChange> CurpNode { .map(NodeState::connect) .cloned() .collect(); - let self_next_index = self.curp.last_log_index() + 1; - // TODO: use bounded - let (action_tx, action_rx) = flume::unbounded(); - let curp = Arc::clone(&self.curp); - self.replication_handles - .map_lock(|mut h| h.abort_all()) - .await; + let self_next_index = curp.last_log_index() + 1; + let (action_tx, action_rx) = flume::bounded(ACTION_CHANNEL_SIZE); + HANDLES.lock().iter().for_each(JoinHandle::abort); let state_handle = tokio::spawn(Self::state_machine_worker(curp, action_rx)); let heartbeat_handle = tokio::spawn(Self::heartbeat_worker( @@ -117,12 +97,10 @@ impl, RC: RoleChange> CurpNode { self_next_index, )) }); - - self.replication_handles.lock().replace_with( - replication_handles - .chain([state_handle]) - .chain([heartbeat_handle]), - ); + *HANDLES.lock() = replication_handles + .chain([state_handle]) + .chain([heartbeat_handle]) + .collect(); } /// A worker responsible for synchronizing data with the curp state machine @@ -176,14 +154,37 @@ impl, RC: RoleChange> CurpNode { loop { let _inst = ticker.tick().await; for connect in &connects { - if let Err(err) = connect.append_entries(heartbeat.into(), timeout).await { - warn!("heartbeat to {} failed, {err:?}", connect.id()); - metrics::get().heartbeat_send_failures.add(1, &[]); + let result = Self::send_heartbeat(connect, heartbeat, timeout).await; + match result { + Ok(other_term) if self_term < other_term => { + info!("heartbeat worker exiting"); + return; + } + Err(err) => { + warn!("heartbeat to {} failed, {err:?}", connect.id()); + metrics::get().heartbeat_send_failures.add(1, &[]); + } + Ok(_) => {} } } } } + /// Send the heartbeat to the give node, returns the term of that node + async fn send_heartbeat( + connect: &InnerConnectApiWrapper, + heartbeat: Heartbeat, + timeout: Duration, + ) -> Result { + debug!("sending heartbeat to: {}", connect.id()); + connect + .append_entries(heartbeat.into(), timeout) + .await + .map(Response::into_inner) + .map(|resp| resp.term) + .map_err(Into::into) + } + /// A worker responsible for appending log entries to other nodes in the cluster async fn replication_worker( node_state: NodeState, @@ -208,8 +209,14 @@ impl, RC: RoleChange> CurpNode { loop { let _ignore = tokio::time::timeout(batch_timeout, sync_event.listen()).await; let (tx, rx) = oneshot::channel(); - if let Err(err) = action_tx.send(Action::GetLogFrom((next_index, tx))) { - error!("action_rx unexpectedly closed: {err}"); + if action_tx + .send(Action::GetLogFrom((next_index, tx))) + .is_err() + { + debug!( + "action_rx closed because the leader stepped down, exiting replication worker" + ); + break; } let action = match rx.await { @@ -229,9 +236,7 @@ impl, RC: RoleChange> CurpNode { if let Action::UpdateNextIndex((_, index)) = action { next_index = index; } - if let Err(err) = action_tx.send(action) { - error!("action_rx was accidentally dropped: {err}"); - } + let __ignore = action_tx.send(action); } } } @@ -307,7 +312,7 @@ impl, RC: RoleChange> CurpNode { .map_err(|err| warn!("failed to receive snapshot result, {err}")) .ok()?; let last_include_index = snapshot.meta.last_included_index; - Self::send_snapshot1(connect, snapshot, self_id, self_term) + Self::send_snapshot(connect, snapshot, self_id, self_term) .await .map(|resp| Self::snapshot_action(resp, connect.id(), self_term, last_include_index)) .map_err(|err| warn!("snapshot to {} failed, {err:?}", connect.id())) @@ -315,7 +320,7 @@ impl, RC: RoleChange> CurpNode { } /// Send snapshot - async fn send_snapshot1( + async fn send_snapshot( connect: &InnerConnectApiWrapper, snapshot: Snapshot, self_id: u64, From 24fbbcaa38b0626b8a14b3e24246f5798874cc7b Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:13:29 +0800 Subject: [PATCH 259/322] chore: move replication state update logic to `RawCurp` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- .../curp/src/server/curp_node/replication.rs | 125 +++++------------- crates/curp/src/server/raw_curp/mod.rs | 7 +- .../curp/src/server/raw_curp/replication.rs | 96 ++++++++++++++ 3 files changed, 136 insertions(+), 92 deletions(-) create mode 100644 crates/curp/src/server/raw_curp/replication.rs diff --git a/crates/curp/src/server/curp_node/replication.rs b/crates/curp/src/server/curp_node/replication.rs index c0b19b415..e3620c579 100644 --- a/crates/curp/src/server/curp_node/replication.rs +++ b/crates/curp/src/server/curp_node/replication.rs @@ -17,7 +17,9 @@ use crate::{ }, server::{ metrics, - raw_curp::{node_state::NodeState, AppendEntries, Heartbeat, SyncAction}, + raw_curp::{ + node_state::NodeState, replication::Action, AppendEntries, Heartbeat, SyncAction, + }, RawCurp, }, snapshot::Snapshot, @@ -31,25 +33,6 @@ lazy_static::lazy_static! { static ref HANDLES: Mutex>> = Mutex::new(Vec::new()); } -/// Represents various actions that can be performed on the `RawCurp` state machine -enum Action { - /// Update the match index for a given node. - /// Contains (node_id, match_index) - UpdateMatchIndex((u64, LogIndex)), - - /// Update the next index for a given node. - /// Contains (node_id, next_index) - UpdateNextIndex((u64, LogIndex)), - - /// Request to get the log starting from a specific index. - /// Contains a tuple with the starting log index and a sender to send the sync action. - GetLogFrom((LogIndex, oneshot::Sender>)), - - /// Step down the current node. - /// Contains the latest term. - StepDown(u64), -} - impl, RC: RoleChange> CurpNode { #[allow(clippy::arithmetic_side_effects)] // a log index(u64) should never overflow /// Respawn replication tasks base on current node states @@ -80,6 +63,7 @@ impl, RC: RoleChange> CurpNode { let state_handle = tokio::spawn(Self::state_machine_worker(curp, action_rx)); let heartbeat_handle = tokio::spawn(Self::heartbeat_worker( + action_tx.clone(), connects, cfg.clone(), self_id, @@ -109,31 +93,11 @@ impl, RC: RoleChange> CurpNode { action_rx: flume::Receiver>, ) { // As we spawn the workers on every leader update, the term remains consistent - let self_term = curp.term(); - while let Ok(update) = action_rx.recv_async().await { - match update { - Action::UpdateMatchIndex((node_id, index)) => { - debug!("updating {node_id}'s match index to {index}"); - curp.update_match_index(node_id, index); - curp.try_update_commit_index(index, self_term); - } - Action::UpdateNextIndex((node_id, index)) => { - debug!("updating {node_id}'s next index to {index}"); - curp.update_next_index(node_id, index); - } - Action::GetLogFrom((next, tx)) => { - debug!("getting log from index {next}"); - let sync = curp.sync_from(next); - if tx.send(sync).is_err() { - error!("send append entries failed"); - } - } - Action::StepDown(node_term) => { - debug_assert!(node_term > self_term, "node_term no greater than self_term"); - info!("received greater term: {node_term}, stepping down."); - curp.step_down(node_term); - break; - } + while let Ok(action) = action_rx.recv_async().await { + let exit = matches!(action, Action::StepDown(_)); + curp.sync_state_machine(action); + if exit { + break; } } // tx dropped, exit @@ -142,6 +106,7 @@ impl, RC: RoleChange> CurpNode { /// A worker responsible for sending heartbeat to the cluster async fn heartbeat_worker( + action_tx: flume::Sender>, connects: Vec, cfg: CurpConfig, self_id: u64, @@ -154,17 +119,18 @@ impl, RC: RoleChange> CurpNode { loop { let _inst = ticker.tick().await; for connect in &connects { - let result = Self::send_heartbeat(connect, heartbeat, timeout).await; + let result = Self::send_heartbeat(connect, heartbeat, self_term, timeout).await; match result { - Ok(other_term) if self_term < other_term => { + Ok(Some(action)) => { + let _ignore = action_tx.send(action); info!("heartbeat worker exiting"); return; } + Ok(None) => {} Err(err) => { warn!("heartbeat to {} failed, {err:?}", connect.id()); metrics::get().heartbeat_send_failures.add(1, &[]); } - Ok(_) => {} } } } @@ -174,14 +140,15 @@ impl, RC: RoleChange> CurpNode { async fn send_heartbeat( connect: &InnerConnectApiWrapper, heartbeat: Heartbeat, + self_term: u64, timeout: Duration, - ) -> Result { + ) -> Result>, CurpError> { debug!("sending heartbeat to: {}", connect.id()); connect .append_entries(heartbeat.into(), timeout) .await .map(Response::into_inner) - .map(|resp| resp.term) + .map(|resp| RawCurp::::heartbeat_action(resp.term, self_term)) .map_err(Into::into) } @@ -255,7 +222,16 @@ impl, RC: RoleChange> CurpNode { } Self::send_append_entries(connect, ae, rpc_timeout, self_id) .await - .map(|resp| Self::append_entries_action(resp, ae, connect.id(), self_term)) + .map(|resp| { + RawCurp::::append_entries_action( + resp.term, + resp.success, + resp.hint_index, + ae, + connect.id(), + self_term, + ) + }) .map_err(|err| warn!("ae to {} failed, {err:?}", connect.id())) .ok() } @@ -276,30 +252,6 @@ impl, RC: RoleChange> CurpNode { .map_err(Into::into) } - #[allow(clippy::as_conversions, clippy::arithmetic_side_effects)] // converting usize to u64 is safe - /// Generate `Action` from append entries response - fn append_entries_action( - resp: AppendEntriesResponse, - ae: &AppendEntries, - node_id: u64, - self_term: u64, - ) -> Action { - let other_term = resp.term; - let success = resp.success; - let hint_index = resp.hint_index; - - if self_term < other_term { - return Action::StepDown(other_term); - } - - if !success { - return Action::UpdateNextIndex((node_id, hint_index)); - } - - let last_sent_index = ae.prev_log_index + ae.entries.len() as u64; - Action::UpdateMatchIndex((node_id, last_sent_index)) - } - /// Handle snapshot async fn handle_snapshot( rx: oneshot::Receiver, @@ -314,7 +266,14 @@ impl, RC: RoleChange> CurpNode { let last_include_index = snapshot.meta.last_included_index; Self::send_snapshot(connect, snapshot, self_id, self_term) .await - .map(|resp| Self::snapshot_action(resp, connect.id(), self_term, last_include_index)) + .map(|resp| { + RawCurp::::snapshot_action( + resp.term, + connect.id(), + self_term, + last_include_index, + ) + }) .map_err(|err| warn!("snapshot to {} failed, {err:?}", connect.id())) .ok() } @@ -332,18 +291,4 @@ impl, RC: RoleChange> CurpNode { .map(Response::into_inner) .map_err(Into::into) } - - /// Generate `Action` from snapshot response - fn snapshot_action( - resp: InstallSnapshotResponse, - node_id: u64, - self_term: u64, - last_include_index: LogIndex, - ) -> Action { - let other_term = resp.term; - if self_term < other_term { - return Action::StepDown(other_term); - } - Action::UpdateMatchIndex((node_id, last_include_index)) - } } diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 357a34a03..69f809449 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -104,6 +104,9 @@ pub(crate) mod node_state; /// Node monitor implementation mod monitor; +/// Log replication implementation +pub(crate) mod replication; + /// The curp state machine pub struct RawCurp { /// Curp state @@ -258,7 +261,7 @@ impl Debug for RawCurp { } /// Actions of syncing -pub(super) enum SyncAction { +pub(crate) enum SyncAction { /// Use append entries to calibrate AppendEntries(AppendEntries), /// Use snapshot to calibrate @@ -311,7 +314,7 @@ impl From for crate::rpc::AppendEntriesRequest { } /// Invoked by leader to replicate log entries; also used as heartbeat -pub(super) struct AppendEntries { +pub(crate) struct AppendEntries { /// Leader's term pub(super) term: u64, /// Leader's id diff --git a/crates/curp/src/server/raw_curp/replication.rs b/crates/curp/src/server/raw_curp/replication.rs new file mode 100644 index 000000000..be9b8028b --- /dev/null +++ b/crates/curp/src/server/raw_curp/replication.rs @@ -0,0 +1,96 @@ +use curp_external_api::{cmd::Command, role_change::RoleChange, LogIndex}; +use tokio::sync::oneshot; +use tracing::{debug, error, info}; + +use crate::rpc::{AppendEntriesResponse, InstallSnapshotResponse}; + +use super::{AppendEntries, RawCurp, SyncAction}; + +/// Represents various actions that can be performed on the `RawCurp` state machine +pub(crate) enum Action { + /// Update the match index for a given node. + /// Contains (node_id, match_index) + UpdateMatchIndex((u64, LogIndex)), + + /// Update the next index for a given node. + /// Contains (node_id, next_index) + UpdateNextIndex((u64, LogIndex)), + + /// Request to get the log starting from a specific index. + /// Contains a tuple with the starting log index and a sender to send the sync action. + GetLogFrom((LogIndex, oneshot::Sender>)), + + /// Step down the current node. + /// Contains the latest term. + StepDown(u64), +} + +impl RawCurp { + /// Synchronizes a action + pub(crate) fn sync_state_machine(&self, action: Action) { + let self_term = self.term(); + match action { + Action::UpdateMatchIndex((node_id, index)) => { + debug!("updating {node_id}'s match index to {index}"); + self.update_match_index(node_id, index); + self.try_update_commit_index(index, self_term); + } + Action::UpdateNextIndex((node_id, index)) => { + debug!("updating {node_id}'s next index to {index}"); + self.update_next_index(node_id, index); + } + Action::GetLogFrom((next, tx)) => { + debug!("getting log from index {next}"); + let sync = self.sync_from(next); + if tx.send(sync).is_err() { + error!("send append entries failed"); + } + } + Action::StepDown(node_term) => { + debug_assert!(node_term > self_term, "node_term no greater than self_term"); + info!("received greater term: {node_term}, stepping down."); + self.step_down(node_term); + } + } + } + + /// Generate `Action` from heartbeat response + pub(crate) fn heartbeat_action(other_term: u64, self_term: u64) -> Option> { + (self_term < other_term).then_some(Action::StepDown(other_term)) + } + + #[allow(clippy::as_conversions, clippy::arithmetic_side_effects)] // converting usize to u64 is safe + /// Generate `Action` from append entries response + pub(crate) fn append_entries_action( + other_term: u64, + success: bool, + hint_index: LogIndex, + ae: &AppendEntries, + node_id: u64, + self_term: u64, + ) -> Action { + if self_term < other_term { + return Action::StepDown(other_term); + } + + if !success { + return Action::UpdateNextIndex((node_id, hint_index)); + } + + let last_sent_index = ae.prev_log_index + ae.entries.len() as u64; + Action::UpdateMatchIndex((node_id, last_sent_index)) + } + + /// Generate `Action` from snapshot response + pub(crate) fn snapshot_action( + other_term: u64, + node_id: u64, + self_term: u64, + last_include_index: LogIndex, + ) -> Action { + if self_term < other_term { + return Action::StepDown(other_term); + } + Action::UpdateMatchIndex((node_id, last_include_index)) + } +} From 5e535df9bfd8d8fdd2b976ffeeda15f051678f4a Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 17 Oct 2024 16:01:38 +0800 Subject: [PATCH 260/322] test: rewrite tests for log replication Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- .../curp/src/server/raw_curp/replication.rs | 142 +++++++++++++++++- crates/curp/src/server/raw_curp/tests.rs | 52 ------- 2 files changed, 140 insertions(+), 54 deletions(-) diff --git a/crates/curp/src/server/raw_curp/replication.rs b/crates/curp/src/server/raw_curp/replication.rs index be9b8028b..351ef50d5 100644 --- a/crates/curp/src/server/raw_curp/replication.rs +++ b/crates/curp/src/server/raw_curp/replication.rs @@ -2,8 +2,6 @@ use curp_external_api::{cmd::Command, role_change::RoleChange, LogIndex}; use tokio::sync::oneshot; use tracing::{debug, error, info}; -use crate::rpc::{AppendEntriesResponse, InstallSnapshotResponse}; - use super::{AppendEntries, RawCurp, SyncAction}; /// Represents various actions that can be performed on the `RawCurp` state machine @@ -94,3 +92,143 @@ impl RawCurp { Action::UpdateMatchIndex((node_id, last_include_index)) } } + +#[cfg(test)] +mod test { + use std::sync::Arc; + + use curp_test_utils::{mock_role_change, test_cmd::TestCommand, TestRoleChange}; + use tracing_test::traced_test; + use utils::task_manager::TaskManager; + + use crate::server::raw_curp::Role; + + use super::*; + + type TestRawCurp = RawCurp; + + #[traced_test] + #[test] + fn replication_entries_will_calibrate_term() { + let curp = RawCurp::new_test(3, mock_role_change(), Arc::new(TaskManager::new())); + let ae = AppendEntries:: { + term: 1, + leader_id: 1, + prev_log_index: 2, + prev_log_term: 1, + leader_commit: 1, + entries: vec![], + }; + let action = TestRawCurp::append_entries_action(2, false, 1, &ae, 2, 1); + curp.sync_state_machine(action); + + let st_r = curp.st.read(); + assert_eq!(st_r.term, 2); + assert_eq!(st_r.role, Role::Follower); + } + + #[traced_test] + #[test] + fn heartbeat_will_calibrate_term() { + let curp = RawCurp::new_test(3, mock_role_change(), Arc::new(TaskManager::new())); + let action = TestRawCurp::heartbeat_action(2, 1).unwrap(); + curp.sync_state_machine(action); + + let st_r = curp.st.read(); + assert_eq!(st_r.term, 2); + assert_eq!(st_r.role, Role::Follower); + } + + #[traced_test] + #[test] + fn snapshot_will_calibrate_term() { + let curp = RawCurp::new_test(3, mock_role_change(), Arc::new(TaskManager::new())); + let action = TestRawCurp::snapshot_action(2, 1, 1, 1); + curp.sync_state_machine(action); + + let st_r = curp.st.read(); + assert_eq!(st_r.term, 2); + assert_eq!(st_r.role, Role::Follower); + } + + #[traced_test] + #[test] + fn snapshot_will_calibrate_index() { + let curp = RawCurp::new_test(3, mock_role_change(), Arc::new(TaskManager::new())); + + let s1_id = curp.get_id_by_name("S1").unwrap(); + assert_eq!(curp.ctx.node_states.get_match_index(s1_id), Some(0)); + + let action = TestRawCurp::snapshot_action(1, s1_id, 1, 1); + curp.sync_state_machine(action); + + let st_r = curp.st.read(); + assert_eq!(st_r.term, 1); + assert_eq!(curp.ctx.node_states.get_match_index(s1_id), Some(1)); + } + + #[traced_test] + #[test] + fn replication_entries_will_calibrate_next_index() { + let curp = RawCurp::new_test(3, mock_role_change(), Arc::new(TaskManager::new())); + + let s1_id = curp.get_id_by_name("S1").unwrap(); + assert_eq!(curp.ctx.node_states.get_next_index(s1_id), Some(1)); + + let ae = AppendEntries:: { + term: 1, + leader_id: 1, + prev_log_index: 1, + prev_log_term: 1, + leader_commit: 1, + entries: vec![], + }; + let action = TestRawCurp::append_entries_action(1, false, 2, &ae, s1_id, 1); + curp.sync_state_machine(action); + + let st_r = curp.st.read(); + assert_eq!(st_r.term, 1); + assert_eq!(curp.ctx.node_states.get_next_index(s1_id), Some(2)); + } + + #[traced_test] + #[test] + fn replication_entries_will_calibrate_match_index() { + let curp = RawCurp::new_test(3, mock_role_change(), Arc::new(TaskManager::new())); + + let s1_id = curp.get_id_by_name("S1").unwrap(); + assert_eq!(curp.ctx.node_states.get_match_index(s1_id), Some(0)); + + let ae = AppendEntries:: { + term: 1, + leader_id: 1, + prev_log_index: 1, + prev_log_term: 1, + leader_commit: 1, + entries: vec![], + }; + let action = TestRawCurp::append_entries_action(1, true, 2, &ae, s1_id, 1); + curp.sync_state_machine(action); + + let st_r = curp.st.read(); + assert_eq!(st_r.term, 1); + assert_eq!(curp.ctx.node_states.get_match_index(s1_id), Some(1)); + } + + #[traced_test] + #[test] + fn handle_ae_will_calibrate_term() { + let task_manager = Arc::new(TaskManager::new()); + let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; + curp.update_to_term_and_become_follower(&mut *curp.st.write(), 1); + let s2_id = curp.get_id_by_name("S2").unwrap(); + + let result = curp.handle_append_entries(2, s2_id, 0, 0, vec![], 0); + assert!(result.is_ok()); + + let st_r = curp.st.read(); + assert_eq!(st_r.term, 2); + assert_eq!(st_r.role, Role::Follower); + assert_eq!(st_r.leader_id, Some(s2_id)); + } +} diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index 9942068c0..0fa7892a0 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -211,58 +211,6 @@ fn follower_handle_propose_will_reject_conflicted() { assert!(matches!(res, Err(CurpError::KeyConflict(())))); } -/*************** tests for append_entries(heartbeat) **************/ - -#[cfg(ignore)] // TODO: rewrite this test -#[traced_test] -#[test] -fn heartbeat_will_calibrate_term() { - let task_manager = Arc::new(TaskManager::new()); - let curp = { RawCurp::new_test(3, mock_role_change(), task_manager) }; - - let s1_id = curp.get_id_by_name("S1").unwrap(); - let result = curp.handle_append_entries_resp(s1_id, None, 2, false, 1); - assert!(result.is_err()); - - let st_r = curp.st.read(); - assert_eq!(st_r.term, 2); - assert_eq!(st_r.role, Role::Follower); -} - -#[cfg(ignore)] // TODO: rewrite this test -#[traced_test] -#[test] -fn heartbeat_will_calibrate_next_index() { - let task_manager = Arc::new(TaskManager::new()); - let curp = RawCurp::new_test(3, mock_role_change(), task_manager); - - let s1_id = curp.get_id_by_name("S1").unwrap(); - let result = curp.handle_append_entries_resp(s1_id, None, 0, false, 1); - assert_eq!(result, Ok(false)); - - let st_r = curp.st.read(); - assert_eq!(st_r.term, 1); - assert_eq!(curp.ctx.node_states.get_next_index(s1_id), Some(1)); -} - -#[cfg(ignore)] // TODO: rewrite this test -#[traced_test] -#[test] -fn handle_ae_will_calibrate_term() { - let task_manager = Arc::new(TaskManager::new()); - let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; - curp.update_to_term_and_become_follower(&mut *curp.st.write(), 1); - let s2_id = curp.get_id_by_name("S2").unwrap(); - - let result = curp.handle_append_entries(2, s2_id, 0, 0, vec![], 0); - assert!(result.is_ok()); - - let st_r = curp.st.read(); - assert_eq!(st_r.term, 2); - assert_eq!(st_r.role, Role::Follower); - assert_eq!(st_r.leader_id, Some(s2_id)); -} - #[traced_test] #[test] fn handle_ae_will_set_leader_id() { From c8014a6e7dec0c382f17a7b7d964366a04dc1301 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 17 Oct 2024 16:46:17 +0800 Subject: [PATCH 261/322] chore: do not convert tonic::Status to CurpError in replication send methods Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- .../curp/src/server/curp_node/replication.rs | 46 +++++++++---------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/crates/curp/src/server/curp_node/replication.rs b/crates/curp/src/server/curp_node/replication.rs index e3620c579..c6dc9eb44 100644 --- a/crates/curp/src/server/curp_node/replication.rs +++ b/crates/curp/src/server/curp_node/replication.rs @@ -12,9 +12,7 @@ use tracing::{debug, error, info, warn}; use utils::config::CurpConfig; use crate::{ - rpc::{ - connect::InnerConnectApiWrapper, AppendEntriesResponse, CurpError, InstallSnapshotResponse, - }, + rpc::{connect::InnerConnectApiWrapper, AppendEntriesResponse, InstallSnapshotResponse}, server::{ metrics, raw_curp::{ @@ -119,18 +117,13 @@ impl, RC: RoleChange> CurpNode { loop { let _inst = ticker.tick().await; for connect in &connects { - let result = Self::send_heartbeat(connect, heartbeat, self_term, timeout).await; - match result { - Ok(Some(action)) => { - let _ignore = action_tx.send(action); - info!("heartbeat worker exiting"); - return; - } - Ok(None) => {} - Err(err) => { - warn!("heartbeat to {} failed, {err:?}", connect.id()); - metrics::get().heartbeat_send_failures.add(1, &[]); - } + if let Some(action) = + Self::send_heartbeat(connect, heartbeat, self_term, timeout).await + { + // step down + let _ignore = action_tx.send(action); + info!("heartbeat worker exiting"); + return; } } } @@ -142,14 +135,19 @@ impl, RC: RoleChange> CurpNode { heartbeat: Heartbeat, self_term: u64, timeout: Duration, - ) -> Result>, CurpError> { + ) -> Option> { debug!("sending heartbeat to: {}", connect.id()); connect .append_entries(heartbeat.into(), timeout) .await .map(Response::into_inner) .map(|resp| RawCurp::::heartbeat_action(resp.term, self_term)) - .map_err(Into::into) + .map_err(|err| { + warn!("heartbeat to {} failed, {err:?}", connect.id()); + metrics::get().heartbeat_send_failures.add(1, &[]); + }) + .ok() + .flatten() } /// A worker responsible for appending log entries to other nodes in the cluster @@ -232,8 +230,6 @@ impl, RC: RoleChange> CurpNode { self_term, ) }) - .map_err(|err| warn!("ae to {} failed, {err:?}", connect.id())) - .ok() } /// Send `append_entries` request @@ -242,14 +238,15 @@ impl, RC: RoleChange> CurpNode { ae: &AppendEntries, timeout: Duration, self_id: u64, - ) -> Result { + ) -> Option { debug!("{self_id} send append_entries to {}", connect.id()); connect .append_entries(ae.into(), timeout) .await .map(Response::into_inner) - .map_err(Into::into) + .map_err(|err| warn!("ae to {} failed, {err:?}", connect.id())) + .ok() } /// Handle snapshot @@ -274,8 +271,6 @@ impl, RC: RoleChange> CurpNode { last_include_index, ) }) - .map_err(|err| warn!("snapshot to {} failed, {err:?}", connect.id())) - .ok() } /// Send snapshot @@ -284,11 +279,12 @@ impl, RC: RoleChange> CurpNode { snapshot: Snapshot, self_id: u64, self_term: u64, - ) -> Result { + ) -> Option { connect .install_snapshot(self_term, self_id, snapshot) .await .map(Response::into_inner) - .map_err(Into::into) + .map_err(|err| warn!("snapshot to {} failed, {err:?}", connect.id())) + .ok() } } From f9a53ce47f215f6b726b6c2273a4d269fc13aac7 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 17 Oct 2024 17:08:54 +0800 Subject: [PATCH 262/322] fix: do not use InnerConnectApi::id because it's deprecated Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- .../curp/src/server/curp_node/replication.rs | 58 +++++++++++-------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/crates/curp/src/server/curp_node/replication.rs b/crates/curp/src/server/curp_node/replication.rs index c6dc9eb44..9d51cd311 100644 --- a/crates/curp/src/server/curp_node/replication.rs +++ b/crates/curp/src/server/curp_node/replication.rs @@ -1,4 +1,4 @@ -use std::{sync::Arc, time::Duration}; +use std::{collections::BTreeMap, sync::Arc, time::Duration}; use curp_external_api::{ cmd::{Command, CommandExecutor}, @@ -50,10 +50,10 @@ impl, RC: RoleChange> CurpNode { let mut node_states = curp.all_node_states(); // we don't needs to sync to self let _ignore = node_states.remove(&self_id); - let connects = node_states - .values() - .map(NodeState::connect) - .cloned() + let connects: BTreeMap<_, _> = node_states + .keys() + .copied() + .zip(node_states.values().map(NodeState::connect).cloned()) .collect(); let self_next_index = curp.last_log_index() + 1; let (action_tx, action_rx) = flume::bounded(ACTION_CHANNEL_SIZE); @@ -71,6 +71,7 @@ impl, RC: RoleChange> CurpNode { let cfg = cfg.clone(); info!("spawning replication task for {id}"); tokio::spawn(Self::replication_worker( + id, state, action_tx.clone(), self_id, @@ -105,7 +106,7 @@ impl, RC: RoleChange> CurpNode { /// A worker responsible for sending heartbeat to the cluster async fn heartbeat_worker( action_tx: flume::Sender>, - connects: Vec, + connects: BTreeMap, cfg: CurpConfig, self_id: u64, self_term: u64, @@ -116,9 +117,9 @@ impl, RC: RoleChange> CurpNode { let heartbeat = Heartbeat::new(self_term, self_id); loop { let _inst = ticker.tick().await; - for connect in &connects { + for (id, connect) in &connects { if let Some(action) = - Self::send_heartbeat(connect, heartbeat, self_term, timeout).await + Self::send_heartbeat(*id, connect, heartbeat, self_term, timeout).await { // step down let _ignore = action_tx.send(action); @@ -131,19 +132,20 @@ impl, RC: RoleChange> CurpNode { /// Send the heartbeat to the give node, returns the term of that node async fn send_heartbeat( + id: u64, connect: &InnerConnectApiWrapper, heartbeat: Heartbeat, self_term: u64, timeout: Duration, ) -> Option> { - debug!("sending heartbeat to: {}", connect.id()); + debug!("sending heartbeat to: {id}"); connect .append_entries(heartbeat.into(), timeout) .await .map(Response::into_inner) .map(|resp| RawCurp::::heartbeat_action(resp.term, self_term)) .map_err(|err| { - warn!("heartbeat to {} failed, {err:?}", connect.id()); + warn!("heartbeat to {id} failed, {err:?}"); metrics::get().heartbeat_send_failures.add(1, &[]); }) .ok() @@ -152,6 +154,7 @@ impl, RC: RoleChange> CurpNode { /// A worker responsible for appending log entries to other nodes in the cluster async fn replication_worker( + node_id: u64, node_state: NodeState, action_tx: flume::Sender>, self_id: u64, @@ -186,10 +189,18 @@ impl, RC: RoleChange> CurpNode { let action = match rx.await { Ok(SyncAction::AppendEntries(ae)) => { - Self::handle_append_entries(&ae, connect, rpc_timeout, self_id, self_term).await + Self::handle_append_entries( + &ae, + node_id, + connect, + rpc_timeout, + self_id, + self_term, + ) + .await } Ok(SyncAction::Snapshot(rx)) => { - Self::handle_snapshot(rx, connect, self_id, self_term).await + Self::handle_snapshot(rx, node_id, connect, self_id, self_term).await } Err(err) => { error!("channel unexpectedly closed: {err}"); @@ -209,6 +220,7 @@ impl, RC: RoleChange> CurpNode { /// Handle append entries async fn handle_append_entries( ae: &AppendEntries, + node_id: u64, connect: &InnerConnectApiWrapper, rpc_timeout: Duration, self_id: u64, @@ -218,7 +230,7 @@ impl, RC: RoleChange> CurpNode { if ae.entries.is_empty() { return None; } - Self::send_append_entries(connect, ae, rpc_timeout, self_id) + Self::send_append_entries(node_id, connect, ae, rpc_timeout, self_id) .await .map(|resp| { RawCurp::::append_entries_action( @@ -226,7 +238,7 @@ impl, RC: RoleChange> CurpNode { resp.success, resp.hint_index, ae, - connect.id(), + node_id, self_term, ) }) @@ -234,24 +246,26 @@ impl, RC: RoleChange> CurpNode { /// Send `append_entries` request async fn send_append_entries( + node_id: u64, connect: &InnerConnectApiWrapper, ae: &AppendEntries, timeout: Duration, self_id: u64, ) -> Option { - debug!("{self_id} send append_entries to {}", connect.id()); + debug!("{self_id} send append_entries to {node_id}"); connect .append_entries(ae.into(), timeout) .await .map(Response::into_inner) - .map_err(|err| warn!("ae to {} failed, {err:?}", connect.id())) + .map_err(|err| warn!("ae to {node_id} failed, {err:?}")) .ok() } /// Handle snapshot async fn handle_snapshot( rx: oneshot::Receiver, + node_id: u64, connect: &InnerConnectApiWrapper, self_id: u64, self_term: u64, @@ -261,20 +275,16 @@ impl, RC: RoleChange> CurpNode { .map_err(|err| warn!("failed to receive snapshot result, {err}")) .ok()?; let last_include_index = snapshot.meta.last_included_index; - Self::send_snapshot(connect, snapshot, self_id, self_term) + Self::send_snapshot(node_id, connect, snapshot, self_id, self_term) .await .map(|resp| { - RawCurp::::snapshot_action( - resp.term, - connect.id(), - self_term, - last_include_index, - ) + RawCurp::::snapshot_action(resp.term, node_id, self_term, last_include_index) }) } /// Send snapshot async fn send_snapshot( + node_id: u64, connect: &InnerConnectApiWrapper, snapshot: Snapshot, self_id: u64, @@ -284,7 +294,7 @@ impl, RC: RoleChange> CurpNode { .install_snapshot(self_term, self_id, snapshot) .await .map(Response::into_inner) - .map_err(|err| warn!("snapshot to {} failed, {err:?}", connect.id())) + .map_err(|err| warn!("snapshot to {node_id} failed, {err:?}")) .ok() } } From cc9adac96724789ccd22a35a700e776fc9ebc547 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 17 Oct 2024 18:21:29 +0800 Subject: [PATCH 263/322] fix: leader should abort replication on step down Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/member_impl.rs | 6 ++++++ crates/curp/src/server/curp_node/replication.rs | 7 ++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index a4a930f17..ec6fc1a14 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -104,6 +104,12 @@ impl, RC: RoleChange> CurpNode { self.wait_commit(Some(propose_id)).await; } + // leader step down + if !self.curp.is_leader() { + debug!("leader step down, aborting replication"); + Self::abort_replication(); + } + self.build_membership_response() } diff --git a/crates/curp/src/server/curp_node/replication.rs b/crates/curp/src/server/curp_node/replication.rs index 9d51cd311..c889d7dee 100644 --- a/crates/curp/src/server/curp_node/replication.rs +++ b/crates/curp/src/server/curp_node/replication.rs @@ -57,7 +57,7 @@ impl, RC: RoleChange> CurpNode { .collect(); let self_next_index = curp.last_log_index() + 1; let (action_tx, action_rx) = flume::bounded(ACTION_CHANNEL_SIZE); - HANDLES.lock().iter().for_each(JoinHandle::abort); + Self::abort_replication(); let state_handle = tokio::spawn(Self::state_machine_worker(curp, action_rx)); let heartbeat_handle = tokio::spawn(Self::heartbeat_worker( @@ -86,6 +86,11 @@ impl, RC: RoleChange> CurpNode { .collect(); } + /// Aborts all ongoing replication tasks + pub(super) fn abort_replication() { + HANDLES.lock().iter().for_each(JoinHandle::abort); + } + /// A worker responsible for synchronizing data with the curp state machine async fn state_machine_worker( curp: Arc>, From 994bf3ee1427d496fcd136ae49e8ff42bbc9c812 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 17 Oct 2024 23:43:23 +0800 Subject: [PATCH 264/322] fix: update role using latest membership Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/member_impl.rs | 2 +- crates/curp/src/server/raw_curp/member_impl.rs | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index ec6fc1a14..b38896ca7 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -185,7 +185,7 @@ impl, RC: RoleChange> CurpNode { pub(crate) fn update_states_with_membership(&self, membership: &Membership) { let connects = self.connect_other_nodes(membership); let _new_states = self.curp.update_node_states(connects); - self.curp.update_role(); + self.curp.update_role(membership); } /// Filter out membership log entries diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 165188653..20f1f2254 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -90,19 +90,20 @@ impl RawCurp { } /// Updates the role of the node based on the current membership state - pub(crate) fn update_role(&self) { - let ms = self.ms.read(); + pub(crate) fn update_role(&self, membership: &Membership) { let mut st_w = self.st.write(); - if ms.is_self_member() { + if membership.contains_member(self.node_id()) { if matches!(st_w.role, Role::Learner) { st_w.role = Role::Follower; } - } else { st_w.role = Role::Learner; } // updates leader id - if st_w.leader_id.map_or(false, |id| !ms.is_member(id)) { + if st_w + .leader_id + .map_or(false, |id| !membership.contains_member(id)) + { st_w.leader_id = None; } } @@ -172,12 +173,12 @@ mod test { // remove from membership curp.update_membership_state(None, [(1, membership1.clone())], None); - curp.update_role(); + curp.update_role(&membership1); assert_eq!(curp.st.read().role, Role::Learner); // add back curp.update_membership_state(None, [(2, membership2.clone())], None); - curp.update_role(); + curp.update_role(&membership2); assert_eq!(curp.st.read().role, Role::Follower); } } From 467f9e2a8e0ef3e1555e908ada2707a2459c8a90 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 18 Oct 2024 00:36:14 +0800 Subject: [PATCH 265/322] refactor: client retry --- crates/curp/src/client/cluster_state.rs | 4 + crates/curp/src/client/keep_alive.rs | 5 +- crates/curp/src/client/retry.rs | 117 ++++++++++++++---------- 3 files changed, 75 insertions(+), 51 deletions(-) diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index 48c24e855..01dedc9ba 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -22,6 +22,7 @@ pub(crate) trait ForEachServer { ) -> FuturesUnordered; } +#[allow(variant_size_differences)] // not an issue /// Cluster State #[derive(Debug, Clone)] pub(crate) enum ClusterState { @@ -29,6 +30,8 @@ pub(crate) enum ClusterState { Init(ClusterStateInit), /// Ready cluster state Full(ClusterStateFull), + /// Error state, containing the previous state + Errored(Box), } impl From for ClusterState { @@ -51,6 +54,7 @@ impl ForEachServer for ClusterState { match *self { ClusterState::Init(ref init) => init.for_each_server(f), ClusterState::Full(ref ready) => ready.for_each_server(f), + ClusterState::Errored(ref state) => state.for_each_server(f), } } } diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index 835c2c982..fe458da10 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -107,10 +107,7 @@ impl KeepAlive { } Err(e) => { warn!("keep alive failed: {e:?}"); - if let Err(err) = cluster_state.fetch_and_update().await { - warn!("fetch cluster failed: {err:?}"); - tokio::time::sleep(FAIL_SLEEP_DURATION).await; - } + cluster_state.errored(); } } diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 4ad11e052..cb38547e5 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -1,7 +1,8 @@ use std::{ + collections::BTreeSet, ops::SubAssign, sync::{atomic::AtomicU64, Arc}, - time::Duration, collections::BTreeSet, + time::Duration, }; use async_trait::async_trait; @@ -10,7 +11,7 @@ use parking_lot::RwLock; use tracing::{debug, warn}; use super::{ - cluster_state::{ClusterState, ClusterStateInit, ClusterStateFull}, + cluster_state::{ClusterState, ClusterStateFull, ClusterStateInit}, config::Config, connect::{ProposeResponse, RepeatableClientApi}, fetch::Fetch, @@ -19,7 +20,11 @@ use super::{ }; use crate::{ members::ServerId, - rpc::{CurpError, ReadState, Redirect, ProposeId, MembershipResponse, NodeMetadata, Node, Change, WaitLearnerResponse}, tracker::Tracker, + rpc::{ + Change, CurpError, MembershipResponse, Node, NodeMetadata, ProposeId, ReadState, Redirect, + WaitLearnerResponse, + }, + tracker::Tracker, }; /// Backoff config @@ -202,27 +207,21 @@ impl ClusterStateShared { } } - /// Fetch and updates current state - /// - /// Returns the fetched cluster state - pub(crate) async fn fetch_and_update(&self) -> Result { - let current = self.inner.read().clone(); - let (new_state, _) = self.fetch.fetch_cluster(current).await?; - *self.inner.write() = ClusterState::Full(new_state.clone()); - debug!("cluster state updates to: {new_state:?}"); - - Ok(new_state) - } - /// Retrieves the cluster state if it's ready, or fetches and updates it if not. pub(crate) async fn ready_or_fetch(&self) -> Result { let current = self.inner.read().clone(); match current { - ClusterState::Init(init) => self.fetch_and_update().await, + ClusterState::Init(_) | ClusterState::Errored(_) => self.fetch_and_update().await, ClusterState::Full(ready) => Ok(ready), } } + /// Marks the current state as errored by updating the inner state to `ClusterState::Errored`. + pub(crate) fn errored(&self) { + let mut inner_w = self.inner.write(); + *inner_w = ClusterState::Errored(Box::new(inner_w.clone())); + } + /// Updates the current state with the provided `ClusterStateReady`. pub(crate) fn update_with(&self, cluster_state: ClusterStateFull) { *self.inner.write() = ClusterState::Full(cluster_state); @@ -233,11 +232,22 @@ impl ClusterStateShared { pub(crate) fn unwrap_full_state(&self) -> ClusterStateFull { let current = self.inner.read().clone(); match current { - ClusterState::Init(_) => unreachable!("initial state"), + ClusterState::Init(_) | ClusterState::Errored(_) => unreachable!("initial state"), ClusterState::Full(ready) => ready, } } + /// Fetch and updates current state + /// + /// Returns the fetched cluster state + async fn fetch_and_update(&self) -> Result { + let current = self.inner.read().clone(); + let (new_state, _) = self.fetch.fetch_cluster(current).await?; + *self.inner.write() = ClusterState::Full(new_state.clone()); + debug!("cluster state updates to: {new_state:?}"); + + Ok(new_state) + } } /// The retry client automatically retry the requests of the inner client api @@ -319,7 +329,14 @@ where let propose_id_guard = self.tracker.gen_propose_id(client_id); let first_incomplete = self.tracker.first_incomplete(); while let Some(delay) = backoff.next_delay() { - let cluster_state = self.cluster_state.ready_or_fetch().await?; + let fetch_result = self.cluster_state.ready_or_fetch().await; + let cluster_state = match fetch_result { + Ok(x) => x, + Err(err) => { + self.on_error(err, delay, &mut last_err).await?; + continue; + } + }; let context = Context::new(*propose_id_guard, first_incomplete, cluster_state.clone()); let result = tokio::select! { result = f(&self.inner, context) => result, @@ -327,21 +344,10 @@ where return Err(CurpError::expired_client_id().into()); }, }; - let err = match result { + match result { Ok(res) => return Ok(res), - Err(err) => err, + Err(err) => self.on_error(err, delay, &mut last_err).await?, }; - self.handle_err(&err, cluster_state).await?; - - #[cfg(feature = "client-metrics")] - super::metrics::get().client_retry_count.add(1, &[]); - - warn!( - "got error: {err:?}, retry on {} seconds later", - delay.as_secs_f32() - ); - last_err = Some(err); - tokio::time::sleep(delay).await; } Err(tonic::Status::deadline_exceeded(format!( @@ -350,12 +356,30 @@ where ))) } - /// Handles errors before another retry - async fn handle_err( + /// Actions performs on error + async fn on_error( &self, - err: &CurpError, - cluster_state: ClusterStateFull, + err: CurpError, + delay: Duration, + last_err: &mut Option, ) -> Result<(), tonic::Status> { + self.handle_err(&err)?; + + #[cfg(feature = "client-metrics")] + super::metrics::get().client_retry_count.add(1, &[]); + + warn!( + "got error: {err:?}, retry on {} seconds later", + delay.as_secs_f32() + ); + *last_err = Some(err); + tokio::time::sleep(delay).await; + + Ok(()) + } + + /// Handles errors before another retry + fn handle_err(&self, err: &CurpError) -> Result<(), tonic::Status> { match *err { // some errors that should not retry CurpError::Duplicated(()) @@ -363,7 +387,7 @@ where | CurpError::InvalidConfig(()) | CurpError::NodeNotExists(()) | CurpError::NodeAlreadyExists(()) - | CurpError::LearnerNotCatchUp(()) + | CurpError::LearnerNotCatchUp(()) | CurpError::InvalidMemberChange(()) => { return Err(tonic::Status::from(err.clone())); @@ -380,8 +404,7 @@ where | CurpError::WrongClusterVersion(()) | CurpError::Redirect(_) // FIXME: The redirect error needs to include full cluster state | CurpError::Zombie(()) => { - // TODO: Prevent concurrent updating cluster state - let _ignore = self.cluster_state.fetch_and_update().await?; + self.cluster_state.errored(); } } @@ -444,21 +467,21 @@ where /// know who the leader is.) /// /// Note: The fetched cluster may still be outdated if `linearizable` is false - async fn fetch_cluster( - &self, - linearizable: bool, - ) -> Result { + async fn fetch_cluster(&self, linearizable: bool) -> Result { self.retry::<_, _>(|client, ctx| async move { - let (_, resp) = self.fetch.fetch_cluster(ClusterState::Full(ctx.cluster_state())).await?; + let (_, resp) = self + .fetch + .fetch_cluster(ClusterState::Full(ctx.cluster_state())) + .await?; Ok(resp) }) .await } - /// Performs membership change async fn change_membership(&self, changes: Vec) -> Result<(), Self::Error> { - let resp = self.retry::<_, _>(|client, ctx| client.change_membership(changes.clone(), ctx)) + let resp = self + .retry::<_, _>(|client, ctx| client.change_membership(changes.clone(), ctx)) .await?; let cluster_state = Fetch::build_cluster_state_from_response(self.fetch.connect_to(), resp); self.cluster_state.update_with(cluster_state); @@ -470,11 +493,11 @@ where async fn wait_learner( &self, node_ids: BTreeSet, - ) -> Result> + Send>, Self::Error> { + ) -> Result> + Send>, Self::Error> + { self.retry::<_, _>(|client, ctx| client.wait_learner(node_ids.clone(), ctx)) .await } - } /// Tests for backoff From 6ba65be93e6983a917fbaf1fc2fbf90c6531dadf Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 18 Oct 2024 09:05:17 +0800 Subject: [PATCH 266/322] fix --- crates/curp/src/server/raw_curp/member_impl.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 20f1f2254..a4b9b2de9 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -96,6 +96,7 @@ impl RawCurp { if matches!(st_w.role, Role::Learner) { st_w.role = Role::Follower; } + } else { st_w.role = Role::Learner; } From 2debf5ccefc1595eb285f6275158e955226cd1ff Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 18 Oct 2024 09:27:40 +0800 Subject: [PATCH 267/322] fix: do not abort unary request when client id expires The retry use tokio::select! to run both `f` and `wait_id_update`, if the client id has been updated, the `f` would be canceled and the client won't be able to get the response. Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/keep_alive.rs | 12 ++++++++++++ crates/curp/src/client/retry.rs | 9 ++------- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/keep_alive.rs index fe458da10..231b22436 100644 --- a/crates/curp/src/client/keep_alive.rs +++ b/crates/curp/src/client/keep_alive.rs @@ -41,6 +41,18 @@ pub(crate) struct KeepAliveHandle { } impl KeepAliveHandle { + /// Gets the client id + pub(crate) async fn client_id(&self) -> u64 { + loop { + let listen_update = self.update_event.listen(); + let latest = self.client_id.load(Ordering::Relaxed); + if latest != 0 { + return latest; + } + listen_update.await; + } + } + /// Wait for the client id pub(crate) async fn wait_id_update(&self, current_id: u64) -> u64 { loop { diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index cb38547e5..6a2f2e047 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -325,7 +325,7 @@ where { let mut backoff = self.retry_config.init_backoff(); let mut last_err = None; - let client_id = self.keep_alive.wait_id_update(0).await; + let client_id = self.keep_alive.client_id().await; let propose_id_guard = self.tracker.gen_propose_id(client_id); let first_incomplete = self.tracker.first_incomplete(); while let Some(delay) = backoff.next_delay() { @@ -338,12 +338,7 @@ where } }; let context = Context::new(*propose_id_guard, first_incomplete, cluster_state.clone()); - let result = tokio::select! { - result = f(&self.inner, context) => result, - _ = self.keep_alive.wait_id_update(client_id) => { - return Err(CurpError::expired_client_id().into()); - }, - }; + let result = f(&self.inner, context).await; match result { Ok(res) => return Ok(res), Err(err) => self.on_error(err, delay, &mut last_err).await?, From 5aafbb33cfc13ca8df489195620422ecd4f7abcc Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 18 Oct 2024 09:53:46 +0800 Subject: [PATCH 268/322] fix: pass leader_id and term to build_membership_response The leader could removes itself on membership change committed, it must record the previous id and term and pass them to build_membership_response. Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/member_impl.rs | 13 +++++++------ crates/curp/src/server/curp_node/mod.rs | 5 ++++- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index b38896ca7..afe4653aa 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -86,6 +86,7 @@ impl, RC: RoleChange> CurpNode { changes: impl IntoIterator, ) -> Result { self.ensure_leader()?; + let (self_id, term) = (self.curp.id(), self.curp.term()); let changes = Self::ensure_non_overlapping(changes)?; let configs = self.curp.generate_membership(changes); if configs.is_empty() { @@ -110,12 +111,15 @@ impl, RC: RoleChange> CurpNode { Self::abort_replication(); } - self.build_membership_response() + self.build_membership_response(self_id, term) } /// Builds a `ChangeMembershipResponse` from the given membership. - pub(crate) fn build_membership_response(&self) -> Result { - let (leader_id, term, _) = self.curp.leader(); + pub(crate) fn build_membership_response( + &self, + leader_id: u64, + term: u64, + ) -> Result { let Membership { members, nodes } = self.curp.effective_membership(); let members = members .into_iter() @@ -131,9 +135,6 @@ impl, RC: RoleChange> CurpNode { }) .collect(); - let leader_id = - leader_id.ok_or(CurpError::LeaderTransfer("no current leader".to_owned()))?; - Ok(MembershipResponse { members, nodes, diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 0cb7ea5d6..c670e5c27 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -365,7 +365,10 @@ impl, RC: RoleChange> CurpNode { &self, _req: FetchMembershipRequest, ) -> Result { - self.build_membership_response() + let (leader_id, term, _) = self.curp.leader(); + let leader_id = + leader_id.ok_or(CurpError::LeaderTransfer("no current leader".to_owned()))?; + self.build_membership_response(leader_id, term) } } From bb929262450b3bce7777e366e18e7ab3a866eb05 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 18 Oct 2024 10:41:40 +0800 Subject: [PATCH 269/322] fix: only append to MembershipState when the index is greater Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 54d196660..e37c13ea4 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -165,7 +165,9 @@ impl MembershipState { /// Append a membership change entry pub(crate) fn append(&mut self, index: LogIndex, membership: Membership) { - self.entries.push(MembershipEntry::new(index, membership)); + if self.last().index < index { + self.entries.push(MembershipEntry::new(index, membership)); + } } /// Truncate at the give log index From ec059be792ba4bc3b3a85bb3ae15a65848ab1e98 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:27:21 +0800 Subject: [PATCH 270/322] refactor: remove self_next_index Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/replication.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/crates/curp/src/server/curp_node/replication.rs b/crates/curp/src/server/curp_node/replication.rs index c889d7dee..e6d192e2c 100644 --- a/crates/curp/src/server/curp_node/replication.rs +++ b/crates/curp/src/server/curp_node/replication.rs @@ -3,7 +3,6 @@ use std::{collections::BTreeMap, sync::Arc, time::Duration}; use curp_external_api::{ cmd::{Command, CommandExecutor}, role_change::RoleChange, - LogIndex, }; use parking_lot::Mutex; use tokio::{sync::oneshot, task::JoinHandle, time::MissedTickBehavior}; @@ -55,7 +54,6 @@ impl, RC: RoleChange> CurpNode { .copied() .zip(node_states.values().map(NodeState::connect).cloned()) .collect(); - let self_next_index = curp.last_log_index() + 1; let (action_tx, action_rx) = flume::bounded(ACTION_CHANNEL_SIZE); Self::abort_replication(); @@ -77,7 +75,6 @@ impl, RC: RoleChange> CurpNode { self_id, self_term, cfg, - self_next_index, )) }); *HANDLES.lock() = replication_handles @@ -165,19 +162,12 @@ impl, RC: RoleChange> CurpNode { self_id: u64, self_term: u64, cfg: CurpConfig, - self_next_index: LogIndex, ) { let rpc_timeout = cfg.rpc_timeout; let batch_timeout = cfg.batch_timeout; let connect = node_state.connect(); let sync_event = node_state.sync_event(); let mut next_index = node_state.next_index(); - // The next_index could be zero if a new leader is elected and it does not have the - // infomations of other nodes. We set the initial index to the next index of the - // current node. - if next_index == 0 { - next_index = self_next_index; - } loop { let _ignore = tokio::time::timeout(batch_timeout, sync_event.listen()).await; From 87b871998b9df7ae96da4da632e9c5731c7b7eb5 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:46:53 +0800 Subject: [PATCH 271/322] fix: update next index to last_log_index instead `last_log_index` points to the non-op log Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/raw_curp/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 69f809449..ab89083d1 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -997,7 +997,7 @@ impl RawCurp { for other in peers { self.ctx .node_states - .update_next_index(other, last_log_index + 1); // iter from the end to front is more likely to match the follower + .update_next_index(other, last_log_index); // iter from the end to front is more likely to match the follower } if prev_last_log_index < last_log_index { // if some entries are recovered, sync with followers immediately From 6ac1d460bb8b86c89368ba0630f31c61c60c7771 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 18 Oct 2024 15:37:01 +0800 Subject: [PATCH 272/322] fix: prevent fetch membership from learner Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/mod.rs | 3 +++ crates/curp/tests/it/server.rs | 24 ++---------------------- 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index c670e5c27..3dd29d957 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -365,6 +365,9 @@ impl, RC: RoleChange> CurpNode { &self, _req: FetchMembershipRequest, ) -> Result { + if self.curp.is_learner() { + return Err(CurpError::learner_not_catch_up()); + } let (leader_id, term, _) = self.curp.leader(); let leader_id = leader_id.ok_or(CurpError::LeaderTransfer("no current leader".to_owned()))?; diff --git a/crates/curp/tests/it/server.rs b/crates/curp/tests/it/server.rs index 1f846308b..eaa1dc048 100644 --- a/crates/curp/tests/it/server.rs +++ b/crates/curp/tests/it/server.rs @@ -23,9 +23,7 @@ use tokio_stream::StreamExt; use tracing_test::traced_test; use utils::config::ClientConfig; -use crate::common::curp_group::{ - commandpb::FetchMembershipRequest, CurpGroup, DEFAULT_SHUTDOWN_TIMEOUT, -}; +use crate::common::curp_group::{CurpGroup, DEFAULT_SHUTDOWN_TIMEOUT}; #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] @@ -596,25 +594,7 @@ async fn new_node_should_apply_old_cluster_logs() { sleep_millis(500).await; // wait for membership sync - // 2. fetch and check cluster from new node - let mut new_connect = group.get_connect(&node_id).await; - let res = new_connect - .fetch_membership(FetchMembershipRequest {}) - .await - .unwrap() - .into_inner(); - assert_eq!(res.nodes.len(), 4); - assert!(res - .nodes - .iter() - .any(|m| m.node_id == node_id && m.meta.as_ref().unwrap().name == "new_node")); - assert!(!res - .members - .iter() - .flat_map(|s| &s.set) - .any(|m| *m == node_id)); - - // 3. check if the new node syncs the command from old cluster + // 2. check if the new node syncs the command from old cluster let new_node = group.nodes.get_mut(&node_id).unwrap(); let (cmd, _) = new_node.as_rx.recv().await.unwrap(); assert_eq!( From 79ac278f3b007fe0fc942ee10dc645076c3d7862 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 18 Oct 2024 15:43:59 +0800 Subject: [PATCH 273/322] fix: add_node_should_add_new_node_to_curp should truncate index Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/raw_curp/tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index 0fa7892a0..c9187d185 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -637,7 +637,7 @@ fn add_node_should_add_new_node_to_curp() { .iter() .flatten() .any(|id| *id == 3)); - let _ignore = curp.update_membership_state(None, Some((1, original_membership)), None); + let _ignore = curp.update_membership_state(Some(1), Some((1, original_membership)), None); assert!(!curp .effective_membership() .members From 05e7eda4c13572efde40b1183f1f85e05b8a6e6a Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 18 Oct 2024 16:11:38 +0800 Subject: [PATCH 274/322] fix: try_become_leader_now should also call respawn_replication Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 3dd29d957..2a9e70cb4 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -601,7 +601,10 @@ impl, RC: RoleChange> CurpNode { _req: &TryBecomeLeaderNowRequest, ) -> Result { if let Some(vote) = self.curp.handle_try_become_leader_now() { - _ = Self::bcast_vote(self.curp.as_ref(), vote).await; + let result = Self::bcast_vote(self.curp.as_ref(), vote).await; + if matches!(result, BCastVoteResult::VoteSuccess) { + Self::respawn_replication(Arc::clone(&self.curp)); + } } Ok(TryBecomeLeaderNowResponse::default()) } From 391efcaf94085c6041221e16a2a381e2f4b339fb Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 18 Oct 2024 16:12:42 +0800 Subject: [PATCH 275/322] fix: state_machine_worker should use cached term instead Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- .../curp/src/server/curp_node/replication.rs | 6 ++++-- .../curp/src/server/raw_curp/replication.rs | 21 +++++++++++-------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/crates/curp/src/server/curp_node/replication.rs b/crates/curp/src/server/curp_node/replication.rs index e6d192e2c..d8ad78815 100644 --- a/crates/curp/src/server/curp_node/replication.rs +++ b/crates/curp/src/server/curp_node/replication.rs @@ -57,7 +57,7 @@ impl, RC: RoleChange> CurpNode { let (action_tx, action_rx) = flume::bounded(ACTION_CHANNEL_SIZE); Self::abort_replication(); - let state_handle = tokio::spawn(Self::state_machine_worker(curp, action_rx)); + let state_handle = tokio::spawn(Self::state_machine_worker(curp, action_rx, self_term)); let heartbeat_handle = tokio::spawn(Self::heartbeat_worker( action_tx.clone(), connects, @@ -92,11 +92,13 @@ impl, RC: RoleChange> CurpNode { async fn state_machine_worker( curp: Arc>, action_rx: flume::Receiver>, + // NOTE: `self_term` might differ from `curp.term()` due to external updates to curp + self_term: u64, ) { // As we spawn the workers on every leader update, the term remains consistent while let Ok(action) = action_rx.recv_async().await { let exit = matches!(action, Action::StepDown(_)); - curp.sync_state_machine(action); + curp.sync_state_machine(self_term, action); if exit { break; } diff --git a/crates/curp/src/server/raw_curp/replication.rs b/crates/curp/src/server/raw_curp/replication.rs index 351ef50d5..078d876b0 100644 --- a/crates/curp/src/server/raw_curp/replication.rs +++ b/crates/curp/src/server/raw_curp/replication.rs @@ -25,8 +25,7 @@ pub(crate) enum Action { impl RawCurp { /// Synchronizes a action - pub(crate) fn sync_state_machine(&self, action: Action) { - let self_term = self.term(); + pub(crate) fn sync_state_machine(&self, self_term: u64, action: Action) { match action { Action::UpdateMatchIndex((node_id, index)) => { debug!("updating {node_id}'s match index to {index}"); @@ -45,7 +44,11 @@ impl RawCurp { } } Action::StepDown(node_term) => { - debug_assert!(node_term > self_term, "node_term no greater than self_term"); + debug_assert!( + node_term > self_term, + "node_term {node_term} no greater than self_term {self_term}, id: {}", + self.id() + ); info!("received greater term: {node_term}, stepping down."); self.step_down(node_term); } @@ -120,7 +123,7 @@ mod test { entries: vec![], }; let action = TestRawCurp::append_entries_action(2, false, 1, &ae, 2, 1); - curp.sync_state_machine(action); + curp.sync_state_machine(1, action); let st_r = curp.st.read(); assert_eq!(st_r.term, 2); @@ -132,7 +135,7 @@ mod test { fn heartbeat_will_calibrate_term() { let curp = RawCurp::new_test(3, mock_role_change(), Arc::new(TaskManager::new())); let action = TestRawCurp::heartbeat_action(2, 1).unwrap(); - curp.sync_state_machine(action); + curp.sync_state_machine(1, action); let st_r = curp.st.read(); assert_eq!(st_r.term, 2); @@ -144,7 +147,7 @@ mod test { fn snapshot_will_calibrate_term() { let curp = RawCurp::new_test(3, mock_role_change(), Arc::new(TaskManager::new())); let action = TestRawCurp::snapshot_action(2, 1, 1, 1); - curp.sync_state_machine(action); + curp.sync_state_machine(1, action); let st_r = curp.st.read(); assert_eq!(st_r.term, 2); @@ -160,7 +163,7 @@ mod test { assert_eq!(curp.ctx.node_states.get_match_index(s1_id), Some(0)); let action = TestRawCurp::snapshot_action(1, s1_id, 1, 1); - curp.sync_state_machine(action); + curp.sync_state_machine(1, action); let st_r = curp.st.read(); assert_eq!(st_r.term, 1); @@ -184,7 +187,7 @@ mod test { entries: vec![], }; let action = TestRawCurp::append_entries_action(1, false, 2, &ae, s1_id, 1); - curp.sync_state_machine(action); + curp.sync_state_machine(1, action); let st_r = curp.st.read(); assert_eq!(st_r.term, 1); @@ -208,7 +211,7 @@ mod test { entries: vec![], }; let action = TestRawCurp::append_entries_action(1, true, 2, &ae, s1_id, 1); - curp.sync_state_machine(action); + curp.sync_state_machine(1, action); let st_r = curp.st.read(); assert_eq!(st_r.term, 1); From 530fa9f878119c8180b033c9d3e2d3af0c1c68c8 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 18 Oct 2024 16:49:36 +0800 Subject: [PATCH 276/322] fix: Action::UpdateMatchIndex should also update next_index Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/replication.rs | 7 +++++-- crates/curp/src/server/raw_curp/node_state.rs | 5 ++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/crates/curp/src/server/curp_node/replication.rs b/crates/curp/src/server/curp_node/replication.rs index d8ad78815..a70389520 100644 --- a/crates/curp/src/server/curp_node/replication.rs +++ b/crates/curp/src/server/curp_node/replication.rs @@ -156,6 +156,7 @@ impl, RC: RoleChange> CurpNode { .flatten() } + #[allow(clippy::arithmetic_side_effects)] // a log index(u64) should never overflow /// A worker responsible for appending log entries to other nodes in the cluster async fn replication_worker( node_id: u64, @@ -206,8 +207,10 @@ impl, RC: RoleChange> CurpNode { }; if let Some(action) = action { - if let Action::UpdateNextIndex((_, index)) = action { - next_index = index; + match action { + Action::UpdateMatchIndex((_, index)) => next_index = index + 1, + Action::UpdateNextIndex((_, index)) => next_index = index, + Action::GetLogFrom(_) | Action::StepDown(_) => {} } let __ignore = action_tx.send(action); } diff --git a/crates/curp/src/server/raw_curp/node_state.rs b/crates/curp/src/server/raw_curp/node_state.rs index 3c7451b5d..8a55c4627 100644 --- a/crates/curp/src/server/raw_curp/node_state.rs +++ b/crates/curp/src/server/raw_curp/node_state.rs @@ -84,7 +84,10 @@ impl NodeStates { } status.match_index = index; status.next_index = index + 1; - debug!("follower {id}'s match_index updated to {index}"); + debug!( + "follower {id}'s match_index updated to {}, next_index updated to {}", + status.match_index, status.next_index + ); }); if opt.is_none() { warn!("follower {} is not found, it maybe has been removed", id); From 2fd19cdf020fd5c5dff041e128fcefa7f9fe9910 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 18 Oct 2024 16:50:42 +0800 Subject: [PATCH 277/322] refactor: remove SyncFollower from task manager Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/utils/src/task_manager/mod.rs | 76 ++------------------------ crates/utils/src/task_manager/tasks.rs | 2 - 2 files changed, 4 insertions(+), 74 deletions(-) diff --git a/crates/utils/src/task_manager/mod.rs b/crates/utils/src/task_manager/mod.rs index 255c0f2c9..ae4c445dd 100644 --- a/crates/utils/src/task_manager/mod.rs +++ b/crates/utils/src/task_manager/mod.rs @@ -33,8 +33,6 @@ pub struct TaskManager { pub struct ClusterShutdownTracker { /// Cluster shutdown notify notify: Notify, - /// Count of sync follower tasks. - sync_follower_task_count: AtomicU8, /// Shutdown Applied leader_notified: AtomicBool, } @@ -46,32 +44,10 @@ impl ClusterShutdownTracker { pub fn new() -> Self { Self { notify: Notify::new(), - sync_follower_task_count: AtomicU8::new(0), leader_notified: AtomicBool::new(false), } } - /// Sync follower task count inc - #[inline] - pub fn sync_follower_task_count_inc(&self) { - let n = self - .sync_follower_task_count - .fetch_add(1, Ordering::Relaxed); - debug!("sync follower task count inc to: {}", n.overflow_add(1)); - } - - /// Sync follower task count dec - #[inline] - pub fn sync_follower_task_count_dec(&self) { - let c = self - .sync_follower_task_count - .fetch_sub(1, Ordering::Relaxed); - if c == 1 { - self.notify.notify_one(); - } - debug!("sync follower task count dec to: {}", c.overflow_sub(1)); - } - /// Mark leader notified #[inline] pub fn mark_leader_notified(&self) { @@ -82,9 +58,7 @@ impl ClusterShutdownTracker { /// Check if the cluster shutdown condition is met fn check(&self) -> bool { - let sync_follower_task_count = self.sync_follower_task_count.load(Ordering::Relaxed); - let leader_notified = self.leader_notified.load(Ordering::Relaxed); - sync_follower_task_count == 0 && leader_notified + self.leader_notified.load(Ordering::Relaxed) } } @@ -144,7 +118,6 @@ impl TaskManager { Some(Listener::new( Arc::clone(&self.state), Arc::clone(&task.notifier), - Arc::clone(&self.cluster_shutdown_tracker), )) } @@ -163,11 +136,7 @@ impl TaskManager { .tasks .get_mut(&name) .unwrap_or_else(|| unreachable!("task {:?} should exist", name)); - let listener = Listener::new( - Arc::clone(&self.state), - Arc::clone(&task.notifier), - Arc::clone(&self.cluster_shutdown_tracker), - ); + let listener = Listener::new(Arc::clone(&self.state), Arc::clone(&task.notifier)); let handle = tokio::spawn(f(listener)); task.handle.push(handle); } @@ -234,9 +203,6 @@ impl TaskManager { self.state.store(2, Ordering::Release); let _ig = tokio::spawn(async move { info!("cluster shutdown start"); - _ = tasks - .get(&TaskName::SyncFollower) - .map(|n| n.notifier.notify_waiters()); loop { if tracker.check() { break; @@ -340,22 +306,12 @@ pub struct Listener { notify: Arc, /// State of task manager state: Arc, - /// Cluster shutdown tracker - cluster_shutdown_tracker: Arc, } impl Listener { /// Create a new `Listener` - fn new( - state: Arc, - notify: Arc, - cluster_shutdown_tracker: Arc, - ) -> Self { - Self { - notify, - state, - cluster_shutdown_tracker, - } + fn new(state: Arc, notify: Arc) -> Self { + Self { notify, state } } /// Get current state @@ -397,30 +353,6 @@ impl Listener { let state = self.state(); matches!(state, State::Shutdown) } - - /// Get a sync follower guard - #[must_use] - #[inline] - pub fn sync_follower_guard(&self) -> SyncFollowerGuard { - self.cluster_shutdown_tracker.sync_follower_task_count_inc(); - SyncFollowerGuard { - tracker: Arc::clone(&self.cluster_shutdown_tracker), - } - } -} - -/// Sync follower guard, used to track sync follower task count -#[derive(Debug)] -pub struct SyncFollowerGuard { - /// Cluster shutdown tracker - tracker: Arc, -} - -impl Drop for SyncFollowerGuard { - #[inline] - fn drop(&mut self) { - self.tracker.sync_follower_task_count_dec(); - } } #[cfg(test)] diff --git a/crates/utils/src/task_manager/tasks.rs b/crates/utils/src/task_manager/tasks.rs index e32606b00..60893dd32 100644 --- a/crates/utils/src/task_manager/tasks.rs +++ b/crates/utils/src/task_manager/tasks.rs @@ -42,7 +42,6 @@ enum_with_iter! { LeaseKeepAlive, TonicServer, Election, - SyncFollower, ConfChange, GcClientLease, RevokeExpiredLeases, @@ -63,7 +62,6 @@ impl TaskName { | TaskName::LeaseKeepAlive | TaskName::TonicServer | TaskName::Election - | TaskName::SyncFollower | TaskName::ConfChange | TaskName::GcClientLease | TaskName::RevokeExpiredLeases From 79892af7c832c0b9a7e4a20b1c2911b3360903ef Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Fri, 18 Oct 2024 16:54:23 +0800 Subject: [PATCH 278/322] chore: remove logging Action::GetLogFrom Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/raw_curp/replication.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/curp/src/server/raw_curp/replication.rs b/crates/curp/src/server/raw_curp/replication.rs index 078d876b0..466b75cbb 100644 --- a/crates/curp/src/server/raw_curp/replication.rs +++ b/crates/curp/src/server/raw_curp/replication.rs @@ -37,7 +37,6 @@ impl RawCurp { self.update_next_index(node_id, index); } Action::GetLogFrom((next, tx)) => { - debug!("getting log from index {next}"); let sync = self.sync_from(next); if tx.send(sync).is_err() { error!("send append entries failed"); From 0fb6aab9dfc8a0ce5a56b68b72a03453b65db9f0 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 21 Oct 2024 15:16:33 +0800 Subject: [PATCH 279/322] refactor: heartbeat will also send commit index to calibrate nodes Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- .../curp/src/server/curp_node/replication.rs | 30 +++++++++++-------- crates/curp/src/server/raw_curp/mod.rs | 12 ++++++-- .../curp/src/server/raw_curp/replication.rs | 9 ++++++ 3 files changed, 36 insertions(+), 15 deletions(-) diff --git a/crates/curp/src/server/curp_node/replication.rs b/crates/curp/src/server/curp_node/replication.rs index a70389520..2dac639c7 100644 --- a/crates/curp/src/server/curp_node/replication.rs +++ b/crates/curp/src/server/curp_node/replication.rs @@ -4,6 +4,7 @@ use curp_external_api::{ cmd::{Command, CommandExecutor}, role_change::RoleChange, }; +use futures::FutureExt; use parking_lot::Mutex; use tokio::{sync::oneshot, task::JoinHandle, time::MissedTickBehavior}; use tonic::Response; @@ -58,13 +59,10 @@ impl, RC: RoleChange> CurpNode { Self::abort_replication(); let state_handle = tokio::spawn(Self::state_machine_worker(curp, action_rx, self_term)); - let heartbeat_handle = tokio::spawn(Self::heartbeat_worker( - action_tx.clone(), - connects, - cfg.clone(), - self_id, - self_term, - )); + let heartbeat_handle = tokio::spawn( + Self::heartbeat_worker(action_tx.clone(), connects, cfg.clone(), self_id, self_term) + .map(|result| info!("heartbeat worker exit, result: {result:?}")), + ); let replication_handles = node_states.into_iter().map(|(id, state)| { let cfg = cfg.clone(); info!("spawning replication task for {id}"); @@ -114,21 +112,29 @@ impl, RC: RoleChange> CurpNode { cfg: CurpConfig, self_id: u64, self_term: u64, - ) { + ) -> Result<(), Box> { let timeout = cfg.rpc_timeout; let mut ticker = tokio::time::interval(cfg.heartbeat_interval); ticker.set_missed_tick_behavior(MissedTickBehavior::Delay); - let heartbeat = Heartbeat::new(self_term, self_id); + loop { let _inst = ticker.tick().await; + let (tx, rx) = oneshot::channel(); + action_tx.send(Action::GetCommitIndex(tx))?; + let commit_index = rx.await?; + let heartbeat = Heartbeat::new(self_term, self_id, commit_index); + for (id, connect) in &connects { if let Some(action) = Self::send_heartbeat(*id, connect, heartbeat, self_term, timeout).await { + debug_assert!( + matches!(action, Action::StepDown(_)), + "action not Action::StepDown" + ); // step down let _ignore = action_tx.send(action); - info!("heartbeat worker exiting"); - return; + return Ok(()); } } } @@ -210,7 +216,7 @@ impl, RC: RoleChange> CurpNode { match action { Action::UpdateMatchIndex((_, index)) => next_index = index + 1, Action::UpdateNextIndex((_, index)) => next_index = index, - Action::GetLogFrom(_) | Action::StepDown(_) => {} + Action::GetLogFrom(_) | Action::StepDown(_) | Action::GetCommitIndex(_) => {} } let __ignore = action_tx.send(action); } diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index ab89083d1..9074c3cd1 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -290,12 +290,18 @@ pub(super) struct Heartbeat { term: u64, /// Leader's id leader_id: ServerId, + /// Leader's commit index + leader_commit: LogIndex, } impl Heartbeat { /// Creates a new `Heartbeat` - pub(super) fn new(term: u64, leader_id: ServerId) -> Self { - Self { term, leader_id } + pub(super) fn new(term: u64, leader_id: ServerId, leader_commit: LogIndex) -> Self { + Self { + term, + leader_id, + leader_commit, + } } } @@ -304,10 +310,10 @@ impl From for crate::rpc::AppendEntriesRequest { Self { term: hb.term, leader_id: hb.leader_id, + leader_commit: hb.leader_commit, // not used for a heartbeat prev_log_index: 0, prev_log_term: 0, - leader_commit: 0, entries: vec![], } } diff --git a/crates/curp/src/server/raw_curp/replication.rs b/crates/curp/src/server/raw_curp/replication.rs index 466b75cbb..7bd0e1992 100644 --- a/crates/curp/src/server/raw_curp/replication.rs +++ b/crates/curp/src/server/raw_curp/replication.rs @@ -18,6 +18,10 @@ pub(crate) enum Action { /// Contains a tuple with the starting log index and a sender to send the sync action. GetLogFrom((LogIndex, oneshot::Sender>)), + /// Request to get the commit index. + /// Contains a sender to send the commit index. + GetCommitIndex(oneshot::Sender), + /// Step down the current node. /// Contains the latest term. StepDown(u64), @@ -42,6 +46,11 @@ impl RawCurp { error!("send append entries failed"); } } + Action::GetCommitIndex(tx) => { + if tx.send(self.commit_index()).is_err() { + error!("send commit index failed"); + } + } Action::StepDown(node_term) => { debug_assert!( node_term > self_term, From 98384c879f947e36229143d9289528b1087f854f Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 21 Oct 2024 16:44:13 +0800 Subject: [PATCH 280/322] feat: implement wait shutdown in CurpNode Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/mod.rs | 38 ++++++++++++++++++- crates/curp/src/server/raw_curp/node_state.rs | 5 +++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 2a9e70cb4..5d412b33c 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -1,5 +1,5 @@ use std::{ - collections::HashMap, + collections::{HashMap, HashSet}, fmt::Debug, sync::Arc, time::{Duration, Instant}, @@ -7,7 +7,7 @@ use std::{ use clippy_utilities::NumericCast; use engine::{SnapshotAllocator, SnapshotApi}; -use futures::{pin_mut, stream::FuturesUnordered, FutureExt, Stream, StreamExt}; +use futures::{future::join_all, pin_mut, stream::FuturesUnordered, FutureExt, Stream, StreamExt}; use madsim::rand::{thread_rng, Rng}; use opentelemetry::KeyValue; use parking_lot::{Mutex, RwLock}; @@ -326,9 +326,43 @@ impl, RC: RoleChange> CurpNode { } self.curp.handle_shutdown(req.propose_id())?; CommandBoard::wait_for_shutdown_synced(&self.cmd_board).await; + self.trigger_nodes_shutdown().await; + Self::abort_replication(); Ok(ShutdownResponse::default()) } + #[allow(clippy::arithmetic_side_effects, clippy::pattern_type_mismatch)] // won't overflow + /// Trigger other nodes to shutdown + async fn trigger_nodes_shutdown(&self) { + /// Wait interval for trigger shutdown + const TRIGGER_INTERVAL: Duration = Duration::from_millis(100); + let mut notified = HashSet::::new(); + let commit_index = self.curp.commit_index(); + loop { + let states = self.curp.all_node_states(); + if notified.len() + 1 == states.len() { + break; + } + let futs: FuturesUnordered<_> = states + .iter() + .filter(|(id, _)| !notified.contains(id)) + .filter(|(_, state)| state.match_index() == commit_index) + .map(|(id, state)| state.connect().trigger_shutdown().map(move |res| (id, res))) + .collect(); + for (id, result) in join_all(futs).await { + match result { + Ok(()) => { + info!("node {id} shutdown triggered"); + let _ignore = notified.insert(*id); + } + Err(err) => warn!("send trigger shutdown rpc to {id} failed, err: {err}"), + } + } + + tokio::time::sleep(TRIGGER_INTERVAL).await; + } + } + /// Handle lease keep alive requests pub(super) async fn lease_keep_alive( &self, diff --git a/crates/curp/src/server/raw_curp/node_state.rs b/crates/curp/src/server/raw_curp/node_state.rs index 8a55c4627..a6ae4d45a 100644 --- a/crates/curp/src/server/raw_curp/node_state.rs +++ b/crates/curp/src/server/raw_curp/node_state.rs @@ -198,6 +198,11 @@ impl NodeState { self.status.next_index } + /// Get the match index of the current node + pub(crate) fn match_index(&self) -> LogIndex { + self.status.match_index + } + /// Get the connection to the node pub(crate) fn connect(&self) -> &InnerConnectApiWrapper { &self.connect From 08cdb21d67ea64d1ee0bfdfb7f034b7c7de5c899 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 21 Oct 2024 20:32:19 +0800 Subject: [PATCH 281/322] refactor: seperate handle append entries and handle heartbeat on followers Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/mod.rs | 21 ++++++ crates/curp/src/server/raw_curp/mod.rs | 86 +++++++++++++++---------- 2 files changed, 74 insertions(+), 33 deletions(-) diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 5d412b33c..d232ea78f 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -422,6 +422,11 @@ impl, RC: RoleChange> CurpNode { let prev_log_index = req.prev_log_index; let prev_log_term = req.prev_log_term; let leader_commit = req.leader_commit; + + if entries.is_empty() { + return Ok(self.heartbeat(leader_id, term, leader_commit)); + } + self.append_entries_inner( entries, leader_id, @@ -432,6 +437,22 @@ impl, RC: RoleChange> CurpNode { ) } + /// Handles heartbeat + fn heartbeat( + &self, + leader_id: u64, + req_term: u64, + leader_commit: u64, + ) -> AppendEntriesResponse { + match self + .curp + .handle_heartbeat(req_term, leader_id, leader_commit) + { + Ok(()) => AppendEntriesResponse::new_accept(req_term), + Err((term, hint)) => AppendEntriesResponse::new_reject(term, hint), + } + } + /// Handle `AppendEntries` requests pub(super) fn append_entries_inner( &self, diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 9074c3cd1..a899e0a22 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -39,7 +39,6 @@ use tracing::debug; use tracing::error; use tracing::log::log_enabled; use tracing::log::Level; -use tracing::trace; use utils::barrier::IdBarrier; use utils::config::CurpConfig; use utils::parking_lot_lock::MutexMap; @@ -759,53 +758,74 @@ impl RawCurp { entries: Vec>, leader_commit: LogIndex, ) -> Result, AppendEntriesFailure> { - if entries.is_empty() { - trace!( - "{} received heartbeat from {}: term({}), commit({}), prev_log_index({}), prev_log_term({})", - self.id(), leader_id, term, leader_commit, prev_log_index, prev_log_term - ); - } else { - debug!( + debug!( "{} received append_entries from {}: term({}), commit({}), prev_log_index({}), prev_log_term({}), {} entries", self.id(), leader_id, term, leader_commit, prev_log_index, prev_log_term, entries.len() ); - } - // validate term and set leader id - { - let st_r = self.st.upgradable_read(); - match st_r.term.cmp(&term) { - std::cmp::Ordering::Less => { - let mut st_w = RwLockUpgradableReadGuard::upgrade(st_r); - self.update_to_term_and_become_follower(&mut st_w, term); - st_w.leader_id = Some(leader_id); - } - std::cmp::Ordering::Equal => { - if st_r.leader_id.is_none() { - let mut st_w = RwLockUpgradableReadGuard::upgrade(st_r); - st_w.leader_id = Some(leader_id); - } - } - std::cmp::Ordering::Greater => { - return Err((st_r.term, self.log.read().commit_index + 1)) - } - } - } + self.validates_term(term, leader_id)?; self.reset_election_tick(); - // append log entries let mut log_w = self.log.write(); let (to_persist, truncate_at) = log_w .try_append_entries(entries, prev_log_index, prev_log_term) .map_err(|_ig| (term, log_w.commit_index + 1))?; - // update commit index + self.update_commit(&mut log_w, leader_commit); + + Ok((term, truncate_at, to_persist)) + } + + /// Handles heartbeat + pub(super) fn handle_heartbeat( + &self, + term: u64, + leader_id: ServerId, + leader_commit: LogIndex, + ) -> Result<(), AppendEntriesFailure> { + debug!( + "{} received heartbeat from {}: term({}), commit({}) ", + self.id(), + leader_id, + term, + leader_commit, + ); + self.validates_term(term, leader_id)?; + self.reset_election_tick(); + self.update_commit(&mut self.log.write(), leader_commit); + + Ok(()) + } + + /// Validates term and set leader id + fn validates_term(&self, term: u64, leader_id: u64) -> Result<(), (u64, u64)> { + let st_r = self.st.upgradable_read(); + match st_r.term.cmp(&term) { + std::cmp::Ordering::Less => { + let mut st_w = RwLockUpgradableReadGuard::upgrade(st_r); + self.update_to_term_and_become_follower(&mut st_w, term); + st_w.leader_id = Some(leader_id); + } + std::cmp::Ordering::Equal => { + if st_r.leader_id.is_none() { + let mut st_w = RwLockUpgradableReadGuard::upgrade(st_r); + st_w.leader_id = Some(leader_id); + } + } + std::cmp::Ordering::Greater => { + return Err((st_r.term, self.log.read().commit_index + 1)) + } + } + + Ok(()) + } + + /// Updates commit index + fn update_commit(&self, log_w: &mut Log, leader_commit: LogIndex) { let prev_commit_index = log_w.commit_index; log_w.commit_index = min(leader_commit, log_w.last_log_index()); if prev_commit_index < log_w.commit_index { self.apply(&mut *log_w); } - - Ok((term, truncate_at, to_persist)) } /// Check if `commit_index` needs to be updated From 70ff862324153209893d7fb4e8bf6b5ed4ca42c9 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 21 Oct 2024 21:24:01 +0800 Subject: [PATCH 282/322] test: refactor test shutdown_rpc_should_shutdown_the_cluster Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/tests/it/common/curp_group.rs | 6 +--- crates/curp/tests/it/server.rs | 43 ++++++++++++----------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/crates/curp/tests/it/common/curp_group.rs b/crates/curp/tests/it/common/curp_group.rs index 3ef02ac99..5907c1f35 100644 --- a/crates/curp/tests/it/common/curp_group.rs +++ b/crates/curp/tests/it/common/curp_group.rs @@ -391,11 +391,7 @@ impl CurpGroup { .flat_map(|node| { BOTTOM_TASKS .iter() - .map(|task| { - node.task_manager - .get_shutdown_listener(task.to_owned()) - .unwrap() - }) + .flat_map(|task| node.task_manager.get_shutdown_listener(task.to_owned())) .collect::>() }) .collect::>(); diff --git a/crates/curp/tests/it/server.rs b/crates/curp/tests/it/server.rs index eaa1dc048..c30536db0 100644 --- a/crates/curp/tests/it/server.rs +++ b/crates/curp/tests/it/server.rs @@ -9,13 +9,13 @@ use clippy_utilities::NumericCast; use curp::{ client::{ClientApi, ClientBuilder}, member::MembershipInfo, - rpc::{Change, CurpError, MembershipResponse, Node, NodeMetadata}, + rpc::{Change, MembershipResponse, Node, NodeMetadata}, }; use curp_test_utils::{ init_logger, sleep_millis, test_cmd::{TestCommand, TestCommandResult, TestCommandType}, }; -use futures::stream::FuturesUnordered; +use futures::{future::join_all, stream::FuturesUnordered, FutureExt}; use madsim::rand::{thread_rng, Rng}; use test_macros::abort_on_panic; use tokio::net::TcpListener; @@ -299,29 +299,32 @@ async fn shutdown_rpc_should_shutdown_the_cluster() { let req_client = group.new_client().await; let collection_task = tokio::spawn(async move { - let mut collection = vec![]; - for i in 0..10 { - let cmd = TestCommand::new_put(vec![i], i); - let res = req_client.propose(&cmd, None, true).await; - if res.is_ok() && res.unwrap().is_ok() { - collection.push(i); - } - } - collection + let cmds: Vec<_> = (0..10).map(|i| TestCommand::new_put(vec![i], i)).collect(); + let futs: FuturesUnordered<_> = (0..10) + .zip(&cmds) + .map(|(i, cmd)| { + req_client + .propose(cmd, None, true) + .map(move |res| res.map(|_| i)) + }) + .collect(); + + join_all(futs) + .await + .into_iter() + .filter_map(Result::ok) + .collect::>() }); let client = group.new_client().await; client.propose_shutdown().await.unwrap(); - let res = client - .propose(&TestCommand::new_put(vec![888], 1), None, false) - .await; - assert!(matches!( - CurpError::from(res.unwrap_err()), - CurpError::ShuttingDown(_) - )); - - let collection = collection_task.await.unwrap(); + let collection = tokio::time::timeout(Duration::from_secs(2), collection_task) + .await + .map(Result::ok) + .ok() + .flatten() + .unwrap_or_else(Vec::new); group .wait_for_group_shutdown(DEFAULT_SHUTDOWN_TIMEOUT) .await; From bb028cc5babb3f713d77fdef66bbab28a1f1f395 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 21 Oct 2024 22:03:56 +0800 Subject: [PATCH 283/322] fix: update_states_with_membership will always connect to all nodes for consistency Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/member_impl.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index afe4653aa..7a7c6043d 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -184,7 +184,7 @@ impl, RC: RoleChange> CurpNode { impl, RC: RoleChange> CurpNode { /// Updates the membership config pub(crate) fn update_states_with_membership(&self, membership: &Membership) { - let connects = self.connect_other_nodes(membership); + let connects = self.connect_nodes(membership); let _new_states = self.curp.update_node_states(connects); self.curp.update_role(membership); } @@ -209,15 +209,14 @@ impl, RC: RoleChange> CurpNode { /// Establishes connections to all nodes specified in the membership configuration, /// excluding the current node. - pub(crate) fn connect_other_nodes( + pub(crate) fn connect_nodes( &self, config: &Membership, ) -> BTreeMap { - let self_id = self.curp.id(); let nodes = config .nodes .iter() - .filter_map(|(id, meta)| (*id != self_id).then_some((*id, meta.peer_urls().to_vec()))) + .map(|(id, meta)| (*id, meta.peer_urls().to_vec())) .collect(); inner_connects(nodes, self.curp.client_tls_config()).collect() From 9191651255afe1cf97a16e2cb51ba6923a70a0de Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 21 Oct 2024 22:06:52 +0800 Subject: [PATCH 284/322] test: do not reconstruct MemebershipInfo in curp_group Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/tests/it/common/curp_group.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/crates/curp/tests/it/common/curp_group.rs b/crates/curp/tests/it/common/curp_group.rs index 5907c1f35..5cad34648 100644 --- a/crates/curp/tests/it/common/curp_group.rs +++ b/crates/curp/tests/it/common/curp_group.rs @@ -281,9 +281,6 @@ impl CurpGroup { let role_change_arc = role_change_cb.get_inner_arc(); let curp_storage = Arc::new(DB::open(&config.engine_cfg).unwrap()); - let init_members = membership_info.init_members; - let node_id = init_members.len(); - let membership_info = MembershipInfo::new(node_id as u64, init_members); let server = Arc::new(Rpc::new( membership_info, false, From 5df50f152615519f77c5e7fe41601dc2e130c338 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 21 Oct 2024 22:14:51 +0800 Subject: [PATCH 285/322] fix: mark election task as cancel safe This prevents the follower tries to starts election when cluster already shutdowned. Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/utils/src/task_manager/tasks.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/utils/src/task_manager/tasks.rs b/crates/utils/src/task_manager/tasks.rs index 60893dd32..3ed500c2b 100644 --- a/crates/utils/src/task_manager/tasks.rs +++ b/crates/utils/src/task_manager/tasks.rs @@ -55,13 +55,12 @@ impl TaskName { /// Returns `true` if the task is cancel safe pub(super) fn cancel_safe(self) -> bool { match self { - TaskName::HandlePropose | TaskName::AfterSync => true, + TaskName::HandlePropose | TaskName::AfterSync | TaskName::Election => true, TaskName::CompactBg | TaskName::KvUpdates | TaskName::WatchTask | TaskName::LeaseKeepAlive | TaskName::TonicServer - | TaskName::Election | TaskName::ConfChange | TaskName::GcClientLease | TaskName::RevokeExpiredLeases From b3e434aecaf599532efc8bdfa16ff133efa2eb57 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 21 Oct 2024 22:17:06 +0800 Subject: [PATCH 286/322] chore: fix clippy build_membership_response Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/member_impl.rs | 8 ++++---- crates/curp/src/server/curp_node/mod.rs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 7a7c6043d..263a9e470 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -111,7 +111,7 @@ impl, RC: RoleChange> CurpNode { Self::abort_replication(); } - self.build_membership_response(self_id, term) + Ok(self.build_membership_response(self_id, term)) } /// Builds a `ChangeMembershipResponse` from the given membership. @@ -119,7 +119,7 @@ impl, RC: RoleChange> CurpNode { &self, leader_id: u64, term: u64, - ) -> Result { + ) -> MembershipResponse { let Membership { members, nodes } = self.curp.effective_membership(); let members = members .into_iter() @@ -135,12 +135,12 @@ impl, RC: RoleChange> CurpNode { }) .collect(); - Ok(MembershipResponse { + MembershipResponse { members, nodes, term, leader_id, - }) + } } /// Wait the command with the propose id to be committed diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index d232ea78f..e4b0754a0 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -405,7 +405,7 @@ impl, RC: RoleChange> CurpNode { let (leader_id, term, _) = self.curp.leader(); let leader_id = leader_id.ok_or(CurpError::LeaderTransfer("no current leader".to_owned()))?; - self.build_membership_response(leader_id, term) + Ok(self.build_membership_response(leader_id, term)) } } From 130f885cfccaea73cdb4cc8052e4520fe60eb9fb Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 22 Oct 2024 20:47:35 +0800 Subject: [PATCH 287/322] fix: madsim tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/mod.rs | 2 +- crates/curp/src/server/curp_node/mod.rs | 5 +- crates/curp/src/server/mod.rs | 4 - crates/simulation/src/curp_group.rs | 107 ++++++++++-------- crates/simulation/src/xline_group.rs | 17 ++- .../tests/it/curp/server_recovery.rs | 26 ++--- crates/xline/src/server/xline_server.rs | 8 +- 7 files changed, 93 insertions(+), 76 deletions(-) diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 83dd877e2..77468ec69 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -353,7 +353,7 @@ impl ClientBuilder { self.init_retry_config(), keep_alive, fetch, - ClusterState::Init(cluster_state_init), + cluster_state_init, ) } } diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index e4b0754a0..7bdb8bea3 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -16,11 +16,14 @@ use tokio::{sync::oneshot, time::MissedTickBehavior}; use tonic::transport::ClientTlsConfig; use tracing::{debug, error, info, warn}; #[cfg(madsim)] +use utils::ClientTlsConfig; +#[cfg(madsim)] use utils::{ barrier::IdBarrier, config::CurpConfig, - task_manager::{tasks::TaskName, Listener, State, TaskManager}, + task_manager::{tasks::TaskName, Listener, TaskManager}, }; +#[cfg(not(madsim))] use utils::{ barrier::IdBarrier, config::CurpConfig, diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index a46d34114..6de1f6dc0 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -361,7 +361,6 @@ impl, RC: RoleChange> Rpc { #[inline] pub async fn run_from_addr( membership_info: MembershipInfo, - cluster_info: Arc, is_leader: bool, addr: std::net::SocketAddr, executor: Arc, @@ -377,7 +376,6 @@ impl, RC: RoleChange> Rpc { use utils::task_manager::tasks::TaskName; use crate::rpc::InnerProtocolServer; - use crate::rpc::MemberProtocolServer; use crate::rpc::ProtocolServer; let n = task_manager @@ -385,7 +383,6 @@ impl, RC: RoleChange> Rpc { .unwrap_or_else(|| unreachable!("cluster should never shutdown before start")); let server = Self::new( membership_info, - cluster_info, is_leader, executor, snapshot_allocator, @@ -400,7 +397,6 @@ impl, RC: RoleChange> Rpc { tonic::transport::Server::builder() .add_service(ProtocolServer::new(server.clone())) - .add_service(MemberProtocolServer::new(server.clone())) .add_service(InnerProtocolServer::new(server)) .serve_with_shutdown(addr, n.wait()) .await?; diff --git a/crates/simulation/src/curp_group.rs b/crates/simulation/src/curp_group.rs index aafcf627c..58204bf73 100644 --- a/crates/simulation/src/curp_group.rs +++ b/crates/simulation/src/curp_group.rs @@ -1,5 +1,5 @@ use std::{ - collections::HashMap, + collections::{BTreeMap, HashMap}, error::Error, path::PathBuf, sync::{atomic::AtomicU64, Arc}, @@ -14,14 +14,15 @@ pub use curp::rpc::{ use curp::{ client::{ClientApi, ClientBuilder}, cmd::Command, - members::{ClusterInfo, ServerId}, + member::MembershipInfo, + members::ServerId, rpc::{ - ConfChange, FetchClusterRequest, FetchClusterResponse, Member, OpResponse, - ProposeConfChangeRequest, ProposeConfChangeResponse, ReadState, + Change, ChangeMembershipRequest, FetchMembershipRequest, MembershipResponse, NodeMetadata, + OpResponse, ReadState, }, server::{ conflict::test_pools::{TestSpecPool, TestUncomPool}, - Rpc, StorageApi, DB, + Rpc, DB, }, LogIndex, }; @@ -34,6 +35,7 @@ use itertools::Itertools; use madsim::runtime::NodeHandle; use parking_lot::Mutex; use tokio::sync::mpsc; +use tonic::Response; use tracing::debug; use utils::{ config::{ClientConfig, CurpConfigBuilder, EngineConfig}, @@ -77,7 +79,20 @@ impl CurpGroup { let all: HashMap<_, _> = (0..n_nodes) .map(|x| (format!("S{x}"), vec![format!("192.168.1.{}:2380", x + 1)])) .collect(); - let mut all_members = HashMap::new(); + let all_members = (0..n_nodes) + .map(|i| (i as u64, format!("192.168.1.{}:2380", i + 1))) + .collect(); + let init_members: BTreeMap<_, _> = all + .clone() + .into_iter() + .enumerate() + .map(|(id, (name, addrs))| { + ( + id as u64, + NodeMetadata::new(name, addrs.clone(), addrs.clone()), + ) + }) + .collect(); let nodes = (0..n_nodes) .map(|i| { @@ -89,13 +104,9 @@ impl CurpGroup { let (as_tx, as_rx) = mpsc::unbounded_channel(); let store = Arc::new(Mutex::new(None)); - let cluster_info = Arc::new(ClusterInfo::from_members_map(all.clone(), [], &name)); - all_members = cluster_info - .all_members_peer_urls() - .into_iter() - .map(|(k, mut v)| (k, v.pop().unwrap())) - .collect(); - let id = cluster_info.self_id(); + let node_id = i as u64; + let membership_info = MembershipInfo::new(node_id, init_members.clone()); + let engine_cfg = EngineConfig::RocksDB(storage_path.clone()); let store_c = Arc::clone(&store); let role_change_cb = TestRoleChange::default(); @@ -103,9 +114,10 @@ impl CurpGroup { let node_handle = handle .create_node() - .name(id.to_string()) + .name(node_id.to_string()) .ip(format!("192.168.1.{}", i + 1).parse().unwrap()) .init(move || { + let membership_info = membership_info.clone(); let task_manager = Arc::new(TaskManager::new()); let ce = Arc::new(TestCE::new( name.clone(), @@ -125,12 +137,9 @@ impl CurpGroup { .unwrap(), ); let curp_storage = Arc::new(DB::open(&curp_config.engine_cfg).unwrap()); - let cluster_info = match curp_storage.recover_cluster_info().unwrap() { - Some(cl) => Arc::new(cl), - None => Arc::clone(&cluster_info), - }; + Rpc::run_from_addr( - cluster_info, + membership_info, is_leader, "0.0.0.0:2380".parse().unwrap(), ce, @@ -149,9 +158,9 @@ impl CurpGroup { .build(); ( - id, + node_id, CurpNode { - id, + id: node_id, addr: peer_url, handle: node_handle, exe_rx, @@ -184,16 +193,16 @@ impl CurpGroup { pub async fn new_client(&self) -> SimClient { let config = ClientConfig::default(); - let all_members = self + let addrs: Vec<_> = self .nodes - .iter() - .map(|(id, node)| (*id, vec![node.addr.clone()])) + .values() + .map(|node| vec![node.addr.clone()]) .collect(); let (client, client_id) = self .client_node .spawn(async move { ClientBuilder::new(config, true) - .all_members(all_members) + .init_nodes(addrs) .build_with_client_id() }) .await @@ -249,22 +258,24 @@ impl CurpGroup { continue; }; - let FetchClusterResponse { - leader_id, term, .. - } = if let Ok(resp) = client.fetch_cluster(FetchClusterRequest::default()).await - { - resp.into_inner() - } else { + let resp = client + .fetch_membership(FetchMembershipRequest::default()) + .await; + let Ok(MembershipResponse { + term, leader_id, .. + }) = resp.map(Response::into_inner) + else { continue; }; + if term > max_term { max_term = term; - leader = leader_id; + leader = Some(leader_id); } else if term == max_term && leader.is_none() { - leader = leader_id; + leader = Some(leader_id); } } - leader.map(|l| (l.into(), max_term)) + leader.map(|l| (l, max_term)) }) .await .unwrap() @@ -296,11 +307,10 @@ impl CurpGroup { continue; }; - let FetchClusterResponse { term, .. } = if let Ok(resp) = - client.fetch_cluster(FetchClusterRequest::default()).await - { - resp.into_inner() - } else { + let resp = client + .fetch_membership(FetchMembershipRequest::default()) + .await; + let Ok(MembershipResponse { term, .. }) = resp.map(Response::into_inner) else { continue; }; @@ -443,16 +453,16 @@ impl SimProtocolClient { #[inline] pub async fn propose_conf_change( &self, - conf_change: impl tonic::IntoRequest, + conf_change: impl tonic::IntoRequest, timeout: Duration, - ) -> Result, tonic::Status> { + ) -> Result, tonic::Status> { let mut req = conf_change.into_request(); req.set_timeout(timeout); let addr = self.addr.clone(); self.handle .spawn(async move { let mut client = ProtocolClient::connect(addr).await.unwrap(); - client.propose_conf_change(req).await + client.change_membership(req).await }) .await .unwrap() @@ -461,13 +471,13 @@ impl SimProtocolClient { #[inline] pub async fn fetch_cluster( &self, - ) -> Result, tonic::Status> { - let req = FetchClusterRequest::default(); + ) -> Result, tonic::Status> { + let req = FetchMembershipRequest::default(); let addr = self.addr.clone(); self.handle .spawn(async move { let mut client = ProtocolClient::connect(addr).await.unwrap(); - client.fetch_cluster(req).await + client.fetch_membership(req).await }) .await .unwrap() @@ -495,13 +505,10 @@ impl SimClient { } #[inline] - pub async fn propose_conf_change( - &self, - changes: Vec, - ) -> Result, tonic::Status> { + pub async fn propose_conf_change(&self, changes: Vec) -> Result<(), tonic::Status> { let inner = self.inner.clone(); self.handle - .spawn(async move { inner.propose_conf_change(changes).await }) + .spawn(async move { inner.change_membership(changes).await }) .await .unwrap() } diff --git a/crates/simulation/src/xline_group.rs b/crates/simulation/src/xline_group.rs index d3a0c41ae..b23e700d5 100644 --- a/crates/simulation/src/xline_group.rs +++ b/crates/simulation/src/xline_group.rs @@ -6,7 +6,7 @@ use tonic::transport::Channel; use tracing::debug; use utils::config::{ AuthConfig, ClientConfig, ClusterConfig, CompactConfig, CurpConfig, InitialClusterState, - ServerTimeout, StorageConfig, TlsConfig, + NodeMetaConfig, ServerTimeout, StorageConfig, TlsConfig, }; use xline::server::XlineServer; use xline_client::{ @@ -42,8 +42,21 @@ impl XlineGroup { let all: HashMap<_, _> = (0..size) .map(|x| (format!("S{x}"), vec![format!("192.168.1.{}:2380", x + 1)])) .collect(); + let membership_info: HashMap<_, _> = (0..size) + .map(|i| { + ( + format!("S{i}"), + NodeMetaConfig::new( + i as u64, + vec![format!("192.168.1.{}:2380", i + 1)], + vec![format!("192.168.1.{}:2379", i + 1)], + ), + ) + }) + .collect(); let nodes = (0..size) .map(|i| { + let membership_info = membership_info.clone(); let name = format!("S{i}"); let client_url = format!("192.168.1.{}:2379", i + 1); let peer_url = format!("192.168.1.{}:2380", i + 1); @@ -59,6 +72,8 @@ impl XlineGroup { ClientConfig::default(), ServerTimeout::default(), InitialClusterState::New, + membership_info, + i as u64, ); let handle = handle diff --git a/crates/simulation/tests/it/curp/server_recovery.rs b/crates/simulation/tests/it/curp/server_recovery.rs index 3e8c85125..009407be7 100644 --- a/crates/simulation/tests/it/curp/server_recovery.rs +++ b/crates/simulation/tests/it/curp/server_recovery.rs @@ -2,7 +2,9 @@ use std::{sync::Arc, time::Duration, vec}; -use curp::rpc::{ConfChange, ProposeConfChangeRequest, RecordRequest}; +use curp::rpc::{ + Change, ChangeMembershipRequest, MembershipChange, Node, NodeMetadata, RecordRequest, +}; use curp_test_utils::{init_logger, sleep_secs, test_cmd::TestCommand, TEST_TABLE}; use engine::{StorageEngine, StorageOps}; use itertools::Itertools; @@ -463,7 +465,6 @@ async fn overwritten_config_should_fallback() { let _wait_for_client_id = client .propose(TestCommand::new_put(vec![0], 0), false) .await; - let client_id = client.client_id(); let leader1 = group.get_leader().await.0; for node in group.nodes.values().filter(|node| node.id != leader1) { group.disable_node(node.id); @@ -474,20 +475,15 @@ async fn overwritten_config_should_fallback() { let node_id = 123; let address = vec!["127.0.0.1:4567".to_owned()]; - let changes = vec![ConfChange::add(node_id, address)]; + let node = Node::new( + node_id, + NodeMetadata::new(format!("S{node_id}"), address.clone(), address), + ); + let changes = vec![MembershipChange { + change: Some(Change::Add(node)), + }]; let res = leader_conn - .propose_conf_change( - ProposeConfChangeRequest { - propose_id: Some(PbProposeId { - client_id, - // start from 1 as we already propose an put with seq_num = 0 - seq_num: 1, - }), - changes, - cluster_version: cluster.cluster_version, - }, - Duration::from_secs(3), - ) + .propose_conf_change(ChangeMembershipRequest { changes }, Duration::from_secs(3)) .await; assert_eq!(res.unwrap_err().code(), Code::DeadlineExceeded); let cluster = leader_conn.fetch_cluster().await.unwrap().into_inner(); diff --git a/crates/xline/src/server/xline_server.rs b/crates/xline/src/server/xline_server.rs index 62c558c39..1542abd55 100644 --- a/crates/xline/src/server/xline_server.rs +++ b/crates/xline/src/server/xline_server.rs @@ -21,6 +21,7 @@ use tonic::transport::{ server::Connected, Certificate, ClientTlsConfig, Identity, ServerTlsConfig, }; use tonic::transport::{server::Router, Server}; +#[cfg(not(madsim))] use tracing::info; use utils::{ barrier::IdBarrier, @@ -92,6 +93,7 @@ pub struct XlineServer { } impl XlineServer { + #[cfg_attr(madsim, allow(clippy::unused_async))] /// New `XlineServer` /// /// # Errors @@ -308,7 +310,7 @@ impl XlineServer { let n2 = n1.clone(); let db = DB::open(&self.storage_config.engine)?; let key_pair = Self::read_key_pair(&self.auth_config).await?; - let (xline_router, curp_router, curp_client) = self.init_router(db, key_pair).await?; + let (xline_router, curp_router, _curp_client) = self.init_router(db, key_pair).await?; let handle = tokio::spawn(async move { tokio::select! { _ = xline_router.serve_with_shutdown(xline_addr, n1.wait()) => {}, @@ -316,9 +318,7 @@ impl XlineServer { } Ok(()) }); - if let Err(e) = self.publish(curp_client).await { - warn!("publish name to cluster failed: {:?}", e); - }; + Ok(handle) } From 72880a7a608758830916d070df941d6b0f3f8790 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 23 Oct 2024 10:43:29 +0800 Subject: [PATCH 288/322] fix: merge vote and pre-vote failure result Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/mod.rs | 20 ++++++++------------ crates/curp/src/server/raw_curp/mod.rs | 2 +- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 7bdb8bea3..a0e6f6848 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -704,11 +704,8 @@ impl, RC: RoleChange> CurpNode { ); let result = Self::bcast_vote(curp.as_ref(), vote).await; debug_assert!( - matches!( - result, - BCastVoteResult::VoteSuccess | BCastVoteResult::VoteFail - ), - "bcast normal vote should always return Vote variants" + matches!(result, BCastVoteResult::VoteSuccess | BCastVoteResult::Fail), + "bcast normal vote should always return Vote variants, result: {result:?}" ); if matches!(result, BCastVoteResult::VoteSuccess) { Self::respawn_replication(Arc::clone(&curp)); @@ -906,7 +903,7 @@ impl, RC: RoleChange> CurpNode { if vote.is_pre_vote { if resp.shutdown_candidate { curp.task_manager().shutdown(false).await; - return BCastVoteResult::PreVoteFail; + return BCastVoteResult::Fail; } let result = curp.handle_pre_vote_resp(id, resp.term, resp.vote_granted); match result { @@ -927,12 +924,12 @@ impl, RC: RoleChange> CurpNode { match result { Ok(false) => {} Ok(true) => return BCastVoteResult::VoteSuccess, - Err(()) => return BCastVoteResult::VoteFail, + Err(()) => return BCastVoteResult::Fail, } }; } - BCastVoteResult::PreVoteFail + BCastVoteResult::Fail } /// Get `RawCurp` @@ -951,15 +948,14 @@ impl, RC: RoleChange> Debug for CurpNode { } /// Invoked by candidates to gather votes -#[derive(Clone)] +#[derive(Debug, Clone)] pub(super) struct Vote { /// Candidate's term pub(super) term: u64, From 53d84039908570a2ad55a52e9bd3f818b7c96921 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 23 Oct 2024 16:02:26 +0800 Subject: [PATCH 289/322] test: add membership madsim tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- Cargo.lock | 1 + .../curp/src/server/curp_node/member_impl.rs | 4 + crates/simulation/Cargo.toml | 1 + crates/simulation/src/curp_group.rs | 114 ++++++- crates/simulation/tests/it/curp/membership.rs | 303 ++++++++++++++++++ crates/simulation/tests/it/curp/mod.rs | 1 + .../tests/it/curp/server_recovery.rs | 2 +- 7 files changed, 410 insertions(+), 16 deletions(-) create mode 100644 crates/simulation/tests/it/curp/membership.rs diff --git a/Cargo.lock b/Cargo.lock index a0f313e9a..ba7660316 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2688,6 +2688,7 @@ dependencies = [ "parking_lot", "prost", "tempfile", + "test-macros", "tracing", "utils", "workspace-hack", diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 263a9e470..446729cbe 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -102,6 +102,10 @@ impl, RC: RoleChange> CurpNode { self.curp.persistent_membership_state()?; // Leader also needs to update transferee self.curp.update_transferee(); + #[cfg(madsim)] // simulate slow commit + { + madsim::time::sleep(std::time::Duration::from_secs(5)).await; + } self.wait_commit(Some(propose_id)).await; } diff --git a/crates/simulation/Cargo.toml b/crates/simulation/Cargo.toml index bbc988f05..1d8a7f823 100644 --- a/crates/simulation/Cargo.toml +++ b/crates/simulation/Cargo.toml @@ -22,6 +22,7 @@ madsim = "0.2.27" parking_lot = "0.12.3" prost = "0.13" tempfile = "3" +test-macros = { path = "../test-macros" } tokio = { version = "0.2.25", package = "madsim-tokio", features = [ "rt", "rt-multi-thread", diff --git a/crates/simulation/src/curp_group.rs b/crates/simulation/src/curp_group.rs index 58204bf73..e1f35cabc 100644 --- a/crates/simulation/src/curp_group.rs +++ b/crates/simulation/src/curp_group.rs @@ -76,23 +76,20 @@ impl CurpGroup { assert!(n_nodes >= 3, "the number of nodes must >= 3"); let handle = madsim::runtime::Handle::current(); - let all: HashMap<_, _> = (0..n_nodes) - .map(|x| (format!("S{x}"), vec![format!("192.168.1.{}:2380", x + 1)])) - .collect(); - let all_members = (0..n_nodes) - .map(|i| (i as u64, format!("192.168.1.{}:2380", i + 1))) - .collect(); - let init_members: BTreeMap<_, _> = all - .clone() - .into_iter() - .enumerate() - .map(|(id, (name, addrs))| { + let init_members: BTreeMap<_, _> = (0..n_nodes) + .map(|id| { + let addrs = vec![format!("192.168.1.{}:2380", id + 1)]; ( id as u64, - NodeMetadata::new(name, addrs.clone(), addrs.clone()), + NodeMetadata::new(format!("S{id}"), addrs.clone(), addrs), ) }) .collect(); + let all_members = init_members + .clone() + .into_iter() + .map(|(id, meta)| (id, meta.peer_urls()[0].clone())) + .collect(); let nodes = (0..n_nodes) .map(|i| { @@ -187,6 +184,84 @@ impl CurpGroup { } } + pub fn run_node(&mut self, id: u64) { + let handle = madsim::runtime::Handle::current(); + let name = format!("S{id}"); + let peer_url = format!("192.168.1.{}:2380", id + 1); + let storage_path = tempfile::tempdir().unwrap().into_path(); + + let (exe_tx, exe_rx) = mpsc::unbounded_channel(); + let (as_tx, as_rx) = mpsc::unbounded_channel(); + let store = Arc::new(Mutex::new(None)); + + let node_id = id as u64; + let membership_info = MembershipInfo::new(node_id, BTreeMap::default()); + + let engine_cfg = EngineConfig::RocksDB(storage_path.clone()); + let store_c = Arc::clone(&store); + let role_change_cb = TestRoleChange::default(); + let role_change_arc = role_change_cb.get_inner_arc(); + + let node_handle = handle + .create_node() + .name(node_id.to_string()) + .ip(format!("192.168.1.{}", id + 1).parse().unwrap()) + .init(move || { + let membership_info = membership_info.clone(); + let task_manager = Arc::new(TaskManager::new()); + let ce = Arc::new(TestCE::new( + name.clone(), + exe_tx.clone(), + as_tx.clone(), + EngineConfig::Memory, + )); + store_c.lock().replace(Arc::clone(&ce.store)); + // we will restart the old leader. + // after the reboot, it may no longer be the leader. + let is_leader = false; + let curp_config = Arc::new( + CurpConfigBuilder::default() + .engine_cfg(engine_cfg.clone()) + .log_entries_cap(10) + .build() + .unwrap(), + ); + let curp_storage = Arc::new(DB::open(&curp_config.engine_cfg).unwrap()); + + Rpc::run_from_addr( + membership_info, + is_leader, + "0.0.0.0:2380".parse().unwrap(), + ce, + Box::new(MemorySnapshotAllocator), + TestRoleChange { + inner: role_change_cb.get_inner_arc(), + }, + curp_config, + curp_storage, + task_manager, + None, + vec![Box::::default()], + vec![Box::::default()], + ) + }) + .build(); + + let node = CurpNode { + id, + addr: peer_url.clone(), + handle: node_handle, + exe_rx, + as_rx, + store, + storage_path, + role_change_arc, + }; + + assert!(self.nodes.insert(id, node).is_none()); + assert!(self.all_members.insert(id, peer_url).is_none()); + } + pub fn get_node(&self, id: &ServerId) -> &CurpNode { &self.nodes[id] } @@ -226,7 +301,7 @@ impl CurpGroup { self.nodes.values_mut().map(|node| &mut node.as_rx) } - pub async fn crash(&mut self, id: ServerId) { + pub async fn crash(&self, id: ServerId) { let handle = madsim::runtime::Handle::current(); handle.kill(id.to_string()); madsim::time::sleep(Duration::from_secs(10)).await; @@ -235,7 +310,7 @@ impl CurpGroup { } } - pub async fn restart(&mut self, id: ServerId) { + pub async fn restart(&self, id: ServerId) { let handle = madsim::runtime::Handle::current(); handle.restart(id.to_string()); } @@ -505,7 +580,7 @@ impl SimClient { } #[inline] - pub async fn propose_conf_change(&self, changes: Vec) -> Result<(), tonic::Status> { + pub async fn change_membership(&self, changes: Vec) -> Result<(), tonic::Status> { let inner = self.inner.clone(); self.handle .spawn(async move { inner.change_membership(changes).await }) @@ -532,6 +607,15 @@ impl SimClient { .unwrap() } + #[inline] + pub async fn fetch_cluster(&self) -> Result { + let inner = self.inner.clone(); + self.handle + .spawn(async move { inner.fetch_cluster(true).await }) + .await + .unwrap() + } + #[inline] pub fn client_id(&self) -> u64 { self.client_id.load(std::sync::atomic::Ordering::Relaxed) diff --git a/crates/simulation/tests/it/curp/membership.rs b/crates/simulation/tests/it/curp/membership.rs new file mode 100644 index 000000000..1f070c263 --- /dev/null +++ b/crates/simulation/tests/it/curp/membership.rs @@ -0,0 +1,303 @@ +use std::{pin::Pin, time::Duration}; + +use curp::rpc::{Change, Node, NodeMetadata}; +use curp_test_utils::{init_logger, test_cmd::TestCommand}; +use futures::{Future, FutureExt}; +use itertools::Itertools; +use madsim::rand::{self, seq::IteratorRandom, Rng}; +use simulation::curp_group::{CurpGroup, SimClient}; +use test_macros::abort_on_panic; + +fn spawn_change_membership( + client: SimClient, + change: Change, +) -> Pin>> { + let handle = tokio::spawn(async move { + while let Err(err) = client.change_membership(vec![change.clone()]).await { + eprintln!("change membership error: {err}"); + if err.code() == tonic::Code::FailedPrecondition { + break; + } + } + }); + Box::pin(handle.map(|r| r.unwrap())) +} + +async fn with_fault_injection( + change: ChangeFut, + fault: Fault, + recovery: Recovery, +) where + ChangeFut: Future, + Fault: Future, + Recovery: Future, +{ + // yield so that other task may run + madsim::task::yield_now().await; + eprintln!("injecting fault"); + fault.await; + change.await; + eprintln!("recovering"); + recovery.await; +} + +async fn with_fault_injection_and_early_recovery( + change: ChangeFut, + fault: Fault, + recovery: Recovery, +) where + ChangeFut: Future, + Fault: Future, + Recovery: Future, +{ + // yield so that other task may run + madsim::task::yield_now().await; + eprintln!("injecting fault"); + fault.await; + madsim::time::sleep(Duration::from_secs(10)).await; + eprintln!("recovering"); + recovery.await; + change.await; +} + +async fn get_leader(group: &CurpGroup) -> u64 { + group + .new_client() + .await + .fetch_cluster() + .await + .unwrap() + .leader_id +} + +async fn assert_membership(group: &CurpGroup, id: u64, meta: NodeMetadata, is_voter: bool) { + let new_membership = group.new_client().await.fetch_cluster().await.unwrap(); + assert!(new_membership + .nodes + .into_iter() + .any(|n| n.node_id == id && n.meta.unwrap() == meta)); + + assert_eq!( + new_membership + .members + .into_iter() + .any(|s| s.set.contains(&id)), + is_voter + ); +} + +async fn assert_non_exist(group: &CurpGroup, id: u64) { + let new_membership = group.new_client().await.fetch_cluster().await.unwrap(); + assert!(!new_membership.nodes.into_iter().any(|n| n.node_id == id)); +} + +#[abort_on_panic] +#[madsim::test] +async fn membership_change_with_reelection() { + init_logger(); + let mut group = CurpGroup::new(5).await; + let meta = NodeMetadata::new( + "new", + vec!["192.168.1.6:2380".to_owned()], + vec!["192.168.1.6:2379".to_owned()], + ); + let node = Node::new(5, meta.clone()); + group.run_node(5); + + let leader0 = get_leader(&group).await; + with_fault_injection( + spawn_change_membership(group.new_client().await, Change::Add(node)), + async { group.disable_node(leader0) }, + async { group.enable_node(leader0) }, + ) + .await; + assert_membership(&group, 5, meta.clone(), false).await; + + let leader1 = get_leader(&group).await; + with_fault_injection( + spawn_change_membership(group.new_client().await, Change::Promote(5)), + async { group.disable_node(leader1) }, + async { group.enable_node(leader1) }, + ) + .await; + assert_membership(&group, 5, meta.clone(), true).await; + + let leader2 = get_leader(&group).await; + with_fault_injection( + spawn_change_membership(group.new_client().await, Change::Demote(5)), + async { group.disable_node(leader2) }, + async { group.enable_node(leader2) }, + ) + .await; + assert_membership(&group, 5, meta.clone(), false).await; + + let leader3 = get_leader(&group).await; + with_fault_injection( + spawn_change_membership(group.new_client().await, Change::Remove(5)), + async { group.disable_node(leader3) }, + async { group.enable_node(leader3) }, + ) + .await; + assert_non_exist(&group, 5).await; +} + +#[madsim::test] +async fn membership_change_with_partition_minority() { + init_logger(); + let mut group = CurpGroup::new(5).await; + let meta = NodeMetadata::new( + "new", + vec!["192.168.1.6:2380".to_owned()], + vec!["192.168.1.6:2379".to_owned()], + ); + let node = Node::new(5, meta.clone()); + group.run_node(5); + + let ids = [0, 1, 2, 3, 4]; + let mut rng = rand::thread_rng(); + let mut get_minority = || ids.iter().combinations(2).choose(&mut rng).unwrap(); + + let minority = get_minority(); + eprintln!("disabling minority: {minority:?}"); + with_fault_injection( + spawn_change_membership(group.new_client().await, Change::Add(node.clone())), + async { minority.iter().for_each(|id| group.disable_node(**id)) }, + async { minority.iter().for_each(|id| group.enable_node(**id)) }, + ) + .await; + assert_membership(&group, 5, meta.clone(), false).await; + + let minority = get_minority(); + eprintln!("disabling minority: {minority:?}"); + with_fault_injection( + spawn_change_membership(group.new_client().await, Change::Promote(5)), + async { minority.iter().for_each(|id| group.disable_node(**id)) }, + async { minority.iter().for_each(|id| group.enable_node(**id)) }, + ) + .await; + assert_membership(&group, 5, meta.clone(), true).await; +} + +#[madsim::test] +async fn membership_change_with_partition_majority() { + init_logger(); + let mut group = CurpGroup::new(5).await; + let meta = NodeMetadata::new( + "new", + vec!["192.168.1.6:2380".to_owned()], + vec!["192.168.1.6:2379".to_owned()], + ); + let node = Node::new(5, meta.clone()); + group.run_node(5); + let ids = [0, 1, 2, 3, 4]; + let mut rng = rand::thread_rng(); + let mut get_majority = || { + ids.iter() + .combinations(rng.gen_range(3..=5)) + .choose(&mut rng) + .unwrap() + }; + + let majority = get_majority(); + eprintln!("disabling majority: {majority:?}"); + with_fault_injection_and_early_recovery( + spawn_change_membership(group.new_client().await, Change::Add(node.clone())), + async { majority.iter().for_each(|id| group.disable_node(**id)) }, + async { majority.iter().for_each(|id| group.enable_node(**id)) }, + ) + .await; + assert_membership(&group, 5, meta.clone(), false).await; + + let majority = get_majority(); + eprintln!("disabling majority: {majority:?}"); + with_fault_injection_and_early_recovery( + spawn_change_membership(group.new_client().await, Change::Promote(5)), + async { majority.iter().for_each(|id| group.disable_node(**id)) }, + async { majority.iter().for_each(|id| group.enable_node(**id)) }, + ) + .await; + assert_membership(&group, 5, meta.clone(), true).await; +} + +#[madsim::test] +async fn membership_change_with_crash_leader() { + init_logger(); + let mut group = CurpGroup::new(5).await; + let meta = NodeMetadata::new( + "new", + vec!["192.168.1.6:2380".to_owned()], + vec!["192.168.1.6:2379".to_owned()], + ); + let node = Node::new(5, meta.clone()); + group.run_node(5); + + let leader = get_leader(&group).await; + with_fault_injection( + spawn_change_membership(group.new_client().await, Change::Add(node.clone())), + group.crash(leader), + group.restart(leader), + ) + .await; + assert_membership(&group, 5, meta.clone(), false).await; + + let leader = get_leader(&group).await; + with_fault_injection( + spawn_change_membership(group.new_client().await, Change::Promote(5)), + group.crash(leader), + group.restart(leader), + ) + .await; + assert_membership(&group, 5, meta.clone(), true).await; +} + +#[madsim::test] +async fn membership_change_with_crash_minority() { + init_logger(); + let mut group = CurpGroup::new(5).await; + let meta = NodeMetadata::new( + "new", + vec!["192.168.1.6:2380".to_owned()], + vec!["192.168.1.6:2379".to_owned()], + ); + let node = Node::new(5, meta.clone()); + group.run_node(5); + let ids = [0, 1, 2, 3, 4]; + let mut rng = rand::thread_rng(); + let mut get_minority = || ids.iter().combinations(2).choose(&mut rng).unwrap(); + + let minority = get_minority(); + eprintln!("disabling minority: {minority:?}"); + with_fault_injection( + spawn_change_membership(group.new_client().await, Change::Add(node.clone())), + async { + for id in &minority { + group.crash(**id).await; + } + }, + async { + for id in &minority { + group.restart(**id).await; + } + }, + ) + .await; + assert_membership(&group, 5, meta.clone(), false).await; + + let minority = get_minority(); + eprintln!("disabling minority: {minority:?}"); + with_fault_injection( + spawn_change_membership(group.new_client().await, Change::Promote(5)), + async { + for id in &minority { + group.crash(**id).await; + } + }, + async { + for id in &minority { + group.restart(**id).await; + } + }, + ) + .await; + assert_membership(&group, 5, meta.clone(), true).await; +} diff --git a/crates/simulation/tests/it/curp/mod.rs b/crates/simulation/tests/it/curp/mod.rs index 2f3cfeb70..9c149b237 100644 --- a/crates/simulation/tests/it/curp/mod.rs +++ b/crates/simulation/tests/it/curp/mod.rs @@ -1,2 +1,3 @@ +mod membership; mod server_election; mod server_recovery; diff --git a/crates/simulation/tests/it/curp/server_recovery.rs b/crates/simulation/tests/it/curp/server_recovery.rs index 009407be7..215450dc0 100644 --- a/crates/simulation/tests/it/curp/server_recovery.rs +++ b/crates/simulation/tests/it/curp/server_recovery.rs @@ -334,7 +334,7 @@ async fn minority_crash_and_recovery() { const NODES: usize = 9; const MINORITY: usize = (NODES - 1) / 2; - let mut group = CurpGroup::new(NODES).await; + let group = CurpGroup::new(NODES).await; let client = group.new_client().await; From af583a9f6ccb11b28b07c59cfa9bed8def7e791d Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 24 Oct 2024 11:29:17 +0800 Subject: [PATCH 290/322] test: fix madsim tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/simulation/tests/it/curp/server_recovery.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/simulation/tests/it/curp/server_recovery.rs b/crates/simulation/tests/it/curp/server_recovery.rs index 215450dc0..7c3412c23 100644 --- a/crates/simulation/tests/it/curp/server_recovery.rs +++ b/crates/simulation/tests/it/curp/server_recovery.rs @@ -471,7 +471,7 @@ async fn overwritten_config_should_fallback() { } let leader_conn = group.get_connect(&leader1).await; let cluster = leader_conn.fetch_cluster().await.unwrap().into_inner(); - assert_eq!(cluster.members.len(), 5); + assert_eq!(cluster.nodes.len(), 5); let node_id = 123; let address = vec!["127.0.0.1:4567".to_owned()]; @@ -487,7 +487,7 @@ async fn overwritten_config_should_fallback() { .await; assert_eq!(res.unwrap_err().code(), Code::DeadlineExceeded); let cluster = leader_conn.fetch_cluster().await.unwrap().into_inner(); - assert_eq!(cluster.members.len(), 6); + assert_eq!(cluster.nodes.len(), 6); group.disable_node(leader1); for node in group.nodes.values().filter(|node| node.id != leader1) { @@ -508,5 +508,5 @@ async fn overwritten_config_should_fallback() { // wait fallback sleep_secs(3).await; let cluster = leader_conn.fetch_cluster().await.unwrap().into_inner(); - assert_eq!(cluster.members.len(), 5); + assert_eq!(cluster.nodes.len(), 5); } From 138fe34c3a0a8bec8a8cb2668413cbb69640c29c Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 24 Oct 2024 11:29:47 +0800 Subject: [PATCH 291/322] fix: membership not truncate on append entries Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/mod.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index a0e6f6848..a8bf4fe29 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -482,13 +482,13 @@ impl, RC: RoleChange> CurpNode { .put_log_entries(&to_persist.iter().map(Arc::as_ref).collect::>())?; if let Some((_, config)) = membership_entries.last() { self.update_states_with_membership(config); - self.curp.update_membership_state( - truncate_at, - membership_entries, - Some(leader_commit), - ); - self.curp.persistent_membership_state()?; } + self.curp.update_membership_state( + truncate_at, + membership_entries, + Some(leader_commit), + ); + self.curp.persistent_membership_state()?; AppendEntriesResponse::new_accept(term) } Err((term, hint)) => AppendEntriesResponse::new_reject(term, hint), From bf35fe8cc62f4a02f44f94b53abe2e07d5f528bc Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 24 Oct 2024 15:19:23 +0800 Subject: [PATCH 292/322] refactor: merge update_membership_state and persistent_membership_state Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- .../curp/src/server/curp_node/member_impl.rs | 27 +++++++++-- crates/curp/src/server/curp_node/mod.rs | 10 +--- .../curp/src/server/raw_curp/member_impl.rs | 48 ++++++++++++------- crates/curp/src/server/raw_curp/mod.rs | 7 ++- crates/curp/src/server/raw_curp/tests.rs | 2 +- 5 files changed, 60 insertions(+), 34 deletions(-) diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 446729cbe..3a05984c5 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -95,11 +95,8 @@ impl, RC: RoleChange> CurpNode { for config in configs { let propose_id = ProposeId(rand::random(), 0); let index = self.curp.push_log_entry(propose_id, config.clone()).index; - self.update_states_with_membership(&config); - self.curp - .update_membership_state(None, Some((index, config)), None); + self.update_membership(None, Some((index, config)), None)?; Self::respawn_replication(Arc::clone(&self.curp)); - self.curp.persistent_membership_state()?; // Leader also needs to update transferee self.curp.update_transferee(); #[cfg(madsim)] // simulate slow commit @@ -186,8 +183,28 @@ impl, RC: RoleChange> CurpNode { // Common methods shared by both leader and followers impl, RC: RoleChange> CurpNode { + /// Updates the membership state and all relevant states + pub(crate) fn update_membership( + &self, + truncate: Option, + append: Entries, + commit: Option, + ) -> Result<(), CurpError> + where + Entries: IntoIterator, + { + let update = self + .curp + .update_membership_state(truncate, append, commit)?; + if let Some(config) = update { + self.update_states_with_membership(&config); + } + + Ok(()) + } + /// Updates the membership config - pub(crate) fn update_states_with_membership(&self, membership: &Membership) { + fn update_states_with_membership(&self, membership: &Membership) { let connects = self.connect_nodes(membership); let _new_states = self.curp.update_node_states(connects); self.curp.update_role(membership); diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index a8bf4fe29..01fecfa1a 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -480,15 +480,7 @@ impl, RC: RoleChange> CurpNode { Ok((term, truncate_at, to_persist)) => { self.storage .put_log_entries(&to_persist.iter().map(Arc::as_ref).collect::>())?; - if let Some((_, config)) = membership_entries.last() { - self.update_states_with_membership(config); - } - self.curp.update_membership_state( - truncate_at, - membership_entries, - Some(leader_commit), - ); - self.curp.persistent_membership_state()?; + self.update_membership(truncate_at, membership_entries, Some(leader_commit))?; AppendEntriesResponse::new_accept(term) } Err((term, hint)) => AppendEntriesResponse::new_reject(term, hint), diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index a4b9b2de9..2c18edf25 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -55,30 +55,32 @@ impl RawCurp { truncate: Option, append: Entries, commit: Option, - ) where + ) -> Result, StorageError> + where Entries: IntoIterator, { + let mut updated = false; let mut ms_w = self.ms.write(); + if let Some(index) = truncate { ms_w.cluster_mut().truncate(index); + updated = true; } for (index, config) in append { ms_w.cluster_mut().append(index, config); + updated = true; } if let Some(index) = commit { ms_w.cluster_mut().commit(index); } - } - /// Persists the current membership state to storage. - /// - /// This method should only be called when new entries are appended to the membership state. - pub(crate) fn persistent_membership_state(&self) -> Result<(), StorageError> { - let (node_id, membership_state) = - self.ms.map_read(|ms| (ms.node_id(), ms.cluster().clone())); - self.ctx - .curp_storage - .put_membership(node_id, &membership_state) + if updated { + self.ctx + .curp_storage + .put_membership(ms_w.node_id(), ms_w.cluster())?; + } + + Ok(updated.then_some(ms_w.cluster().effective().clone())) } /// Updates the node states @@ -145,15 +147,21 @@ mod test { (0..5).map(|id| (id, NodeMetadata::default())).collect(), ); - curp.update_membership_state(None, [(1, membership1.clone())], None); + let _ignore = curp + .update_membership_state(None, [(1, membership1.clone())], None) + .unwrap(); assert_eq!(*curp.ms.read().cluster().effective(), membership1); - curp.update_membership_state(None, [(2, membership2.clone())], None); + let _ignore = curp + .update_membership_state(None, [(2, membership2.clone())], None) + .unwrap(); assert_eq!(*curp.ms.read().cluster().effective(), membership2); - curp.update_membership_state(Some(1), [], None); + let _ignore = curp.update_membership_state(Some(1), [], None).unwrap(); assert_eq!(*curp.ms.read().cluster().effective(), membership1); - curp.update_membership_state(None, [(2, membership2.clone())], None); - curp.update_membership_state(None, [], Some(2)); + let _ignore = curp + .update_membership_state(None, [(2, membership2.clone())], None) + .unwrap(); + let _ignore = curp.update_membership_state(None, [], Some(2)).unwrap(); assert_eq!(*curp.ms.read().cluster().effective(), membership2); assert_eq!(*curp.ms.read().cluster().committed(), membership2); } @@ -173,12 +181,16 @@ mod test { ); // remove from membership - curp.update_membership_state(None, [(1, membership1.clone())], None); + let _ignore = curp + .update_membership_state(None, [(1, membership1.clone())], None) + .unwrap(); curp.update_role(&membership1); assert_eq!(curp.st.read().role, Role::Learner); // add back - curp.update_membership_state(None, [(2, membership2.clone())], None); + let _ignore = curp + .update_membership_state(None, [(2, membership2.clone())], None) + .unwrap(); curp.update_role(&membership2); assert_eq!(curp.st.read().role, Role::Follower); } diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 3514bc452..1f5418a93 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -835,7 +835,12 @@ impl RawCurp { let mut log_w = RwLockUpgradableReadGuard::upgrade(log_r); if index > log_w.commit_index { log_w.commit_to(index); - self.update_membership_state(None, None, Some(index)); + // update commit index won't update the storage + debug_assert!( + self.update_membership_state(None, None, Some(index)) + .is_ok(), + "failed to update membership state" + ); debug!("{} updates commit index to {index}", self.id()); self.apply(&mut *log_w); } diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index c9187d185..f36a01cf5 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -663,7 +663,7 @@ fn add_learner_node_and_promote_should_success() { .flatten() .any(|id| *id == 3)); curp.log.write().commit_to(1); - curp.update_membership_state(None, None, Some(1)); + let _ignore = curp.update_membership_state(None, None, Some(1)).unwrap(); let membership = curp .generate_membership(Some(Change::Promote(3))) .pop() From bf8cfbfa0f990c56c768329f688e7a30471ae00b Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 24 Oct 2024 23:15:13 +0800 Subject: [PATCH 293/322] test: add membership idempotent test Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/member.rs | 49 +++++++++---------- .../curp/src/server/curp_node/member_impl.rs | 8 +-- .../curp/src/server/raw_curp/member_impl.rs | 2 +- crates/curp/src/server/raw_curp/tests.rs | 11 +++-- crates/curp/tests/it/server.rs | 45 +++++++++++++++++ 5 files changed, 82 insertions(+), 33 deletions(-) diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index e37c13ea4..4b8cf3e2d 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -198,13 +198,13 @@ impl MembershipState { /// Generates a new membership from `Change` /// /// Returns an empty `Vec` if there's an on-going membership change - pub(crate) fn changes(&self, changes: Changes) -> Vec + pub(crate) fn changes(&self, changes: Changes) -> Option> where Changes: IntoIterator, { // membership uncommitted, return an empty vec if self.entries.len() != 1 { - return vec![]; + return None; } self.last().membership.changes(changes) } @@ -261,7 +261,7 @@ impl Membership { /// Generates a new membership from `Change` /// /// Returns `None` if the change is invalid - pub(crate) fn changes(&self, changes: Changes) -> Vec + pub(crate) fn changes(&self, changes: Changes) -> Option> where Changes: IntoIterator, { @@ -272,24 +272,26 @@ impl Membership { match change { Change::Add(node) => { let (id, meta) = node.into_parts(); - if nodes.insert(id, meta).is_some() { - return vec![]; + if set.contains(&id) { + return None; } + let _ignore = nodes.insert(id, meta); } Change::Remove(id) => { - if nodes.remove(&id).is_none() { - return vec![]; + if set.contains(&id) { + return None; } + let _ignore = nodes.remove(&id).is_none(); } Change::Promote(id) => { - if self.is_current_member(id) { - return vec![]; + if !nodes.contains_key(&id) { + return None; } let _ignore = set.insert(id); } Change::Demote(id) => { - if !self.is_current_member(id) { - return vec![]; + if !nodes.contains_key(&id) { + return None; } let _ignore = set.remove(&id); } @@ -301,7 +303,7 @@ impl Membership { nodes, }; - Self::all_coherent(self.clone(), &target) + Some(Self::all_coherent(self.clone(), &target)) } /// Gets the current member set @@ -310,11 +312,6 @@ impl Membership { self.members.last().unwrap() } - /// Returns `true` if the given id exists in the current member set - fn is_current_member(&self, id: u64) -> bool { - self.current_member_set().contains(&id) - } - /// Generates all coherent membership to reach the target fn all_coherent(current: Self, target: &Self) -> Vec { let next = |curr: &Self| { @@ -507,15 +504,16 @@ mod tests { } }; - let changes = - membership_state.changes([Change::Add(Node::new(2, NodeMetadata::default()))]); + let changes = membership_state + .changes([Change::Add(Node::new(2, NodeMetadata::default()))]) + .unwrap(); assert_eq!( changes, vec![build_membership_with_learners([vec![1]], [2])] ); apply_changes(&mut membership_state, changes.clone()); - let changes = membership_state.changes([Change::Promote(2)]); + let changes = membership_state.changes([Change::Promote(2)]).unwrap(); assert_eq!( changes, vec![ @@ -525,7 +523,7 @@ mod tests { ); apply_changes(&mut membership_state, changes.clone()); - let changes = membership_state.changes([Change::Demote(2)]); + let changes = membership_state.changes([Change::Demote(2)]).unwrap(); assert_eq!( changes, vec![ @@ -535,7 +533,7 @@ mod tests { ); apply_changes(&mut membership_state, changes.clone()); - let changes = membership_state.changes([Change::Remove(2)]); + let changes = membership_state.changes([Change::Remove(2)]).unwrap(); assert_eq!(changes, vec![build_membership([vec![1]])]); apply_changes(&mut membership_state, changes.clone()); } @@ -544,14 +542,15 @@ mod tests { fn test_membership_changes_reject_uncommitted() { let mut index = 1; let mut membership_state = MembershipState::new(build_membership([vec![1]])); - let changes = - membership_state.changes([Change::Add(Node::new(2, NodeMetadata::default()))]); + let changes = membership_state + .changes([Change::Add(Node::new(2, NodeMetadata::default()))]) + .unwrap(); for change in changes { // append but not committed membership_state.append(index, change); index += 1; } - assert!(membership_state.changes([Change::Promote(2)]).is_empty()); + assert!(membership_state.changes([Change::Promote(2)]).is_none()); } } diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 3a05984c5..7d04ffbee 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -88,10 +88,10 @@ impl, RC: RoleChange> CurpNode { self.ensure_leader()?; let (self_id, term) = (self.curp.id(), self.curp.term()); let changes = Self::ensure_non_overlapping(changes)?; - let configs = self.curp.generate_membership(changes); - if configs.is_empty() { - return Err(CurpError::invalid_member_change()); - } + let configs = self + .curp + .generate_membership(changes) + .ok_or(CurpError::invalid_member_change())?; for config in configs { let propose_id = ProposeId(rand::random(), 0); let index = self.curp.push_log_entry(propose_id, config.clone()).index; diff --git a/crates/curp/src/server/raw_curp/member_impl.rs b/crates/curp/src/server/raw_curp/member_impl.rs index 2c18edf25..f29166fab 100644 --- a/crates/curp/src/server/raw_curp/member_impl.rs +++ b/crates/curp/src/server/raw_curp/member_impl.rs @@ -23,7 +23,7 @@ use super::Role; // Leader methods impl RawCurp { /// Generate memberships based on the provided change - pub(crate) fn generate_membership(&self, changes: Changes) -> Vec + pub(crate) fn generate_membership(&self, changes: Changes) -> Option> where Changes: IntoIterator, { diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index f36a01cf5..34ee29a45 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -653,6 +653,7 @@ fn add_learner_node_and_promote_should_success() { let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; let membership = curp .generate_membership(Some(Change::Add(Node::new(3, NodeMetadata::default())))) + .unwrap() .pop() .unwrap(); let _ignore = curp.update_membership_state(None, Some((1, membership)), None); @@ -666,6 +667,7 @@ fn add_learner_node_and_promote_should_success() { let _ignore = curp.update_membership_state(None, None, Some(1)).unwrap(); let membership = curp .generate_membership(Some(Change::Promote(3))) + .unwrap() .pop() .unwrap(); let _ignore = curp.update_membership_state(None, Some((2, membership)), None); @@ -688,9 +690,11 @@ fn add_exists_node_should_have_no_effect() { exists_node_id, NodeMetadata::default(), )))) + .unwrap() .is_empty()); assert!(curp .generate_membership(Some(Change::Promote(exists_node_id))) + .unwrap() .is_empty()); } @@ -702,6 +706,7 @@ fn remove_node_should_remove_node_from_curp() { let follower_id = curp.get_id_by_name("S1").unwrap(); let membership = curp .generate_membership(Some(Change::Demote(follower_id))) + .unwrap() .pop() .unwrap(); let _ignore = curp.update_membership_state(None, Some((1, membership)), None); @@ -721,10 +726,9 @@ fn remove_non_exists_node_should_have_no_effect() { let curp = { Arc::new(RawCurp::new_test(5, mock_role_change(), task_manager)) }; assert!(curp .generate_membership(Some(Change::Remove(10))) + .unwrap() .is_empty()); - assert!(curp - .generate_membership(Some(Change::Demote(10))) - .is_empty()); + assert!(curp.generate_membership(Some(Change::Demote(10))).is_none()); } #[traced_test] @@ -757,6 +761,7 @@ fn leader_handle_move_leader() { let curp = { Arc::new(RawCurp::new_test(3, mock_role_change(), task_manager)) }; let membership = curp .generate_membership(Some(Change::Add(Node::new(1234, NodeMetadata::default())))) + .unwrap() .pop() .unwrap(); let _ignore = curp.update_membership_state(None, Some((1, membership)), None); diff --git a/crates/curp/tests/it/server.rs b/crates/curp/tests/it/server.rs index c30536db0..f20381146 100644 --- a/crates/curp/tests/it/server.rs +++ b/crates/curp/tests/it/server.rs @@ -517,6 +517,51 @@ async fn propose_remove_leader_should_success() { assert_ne!(id, new_id); } +#[tokio::test(flavor = "multi_thread")] +#[abort_on_panic] +async fn change_membership_should_be_idempotent() { + init_logger(); + + let group = CurpGroup::new(3).await; + let client = group.new_client().await; + + let node_meta = NodeMetadata::new("new_node", ["addr"], ["addr"]); + let node_id = 5; + let node = Node::new(node_id, node_meta.clone()); + + for _ in 0..2 { + client + .change_membership(vec![Change::Add(node.clone())]) + .await + .unwrap(); + } + assert_cluster(&client, 4, 3, [NodeAssert::new(node_id, node_meta, false)]).await; + + for _ in 0..2 { + client + .change_membership(vec![Change::Promote(node_id)]) + .await + .unwrap(); + } + assert_cluster(&client, 4, 4, []).await; + + for _ in 0..2 { + client + .change_membership(vec![Change::Demote(node_id)]) + .await + .unwrap(); + } + assert_cluster(&client, 4, 3, []).await; + + for _ in 0..2 { + client + .change_membership(vec![Change::Remove(node_id)]) + .await + .unwrap(); + } + assert_cluster(&client, 3, 3, []).await; +} + #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] async fn shutdown_rpc_should_shutdown_the_cluster_when_client_has_wrong_leader() { From 8ccd5c51e84bc7f120cf0295bc6b610ec90f60a0 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 28 Oct 2024 10:07:26 +0800 Subject: [PATCH 294/322] refactor: add cluster version to `ChangeMembershipRequest` Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/proto/common | 2 +- crates/curp/src/client/unary/mod.rs | 6 +++++- crates/curp/src/server/curp_node/member_impl.rs | 2 ++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/crates/curp/proto/common b/crates/curp/proto/common index 87fbd7fd2..a887141b1 160000 --- a/crates/curp/proto/common +++ b/crates/curp/proto/common @@ -1 +1 @@ -Subproject commit 87fbd7fd29335e0799017dde55b93e71dc69da8a +Subproject commit a887141b18371357ad811b4a9bcae584e26e2f01 diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 282ba37e7..5eef499c4 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -120,7 +120,11 @@ impl RepeatableClientApi for Unary { .into_iter() .map(|c| MembershipChange { change: Some(c) }) .collect(); - let req = ChangeMembershipRequest { changes }; + let cluster_version = ctx.cluster_state().cluster_version(); + let req = ChangeMembershipRequest { + cluster_version, + changes, + }; let timeout = self.config.wait_synced_timeout(); let resp = ctx .cluster_state() diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 7d04ffbee..857cdf4bd 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -40,6 +40,8 @@ impl, RC: RoleChange> CurpNode { &self, request: ChangeMembershipRequest, ) -> Result { + self.curp.check_cluster_version(request.cluster_version)?; + let changes = request .changes .into_iter() From e1510b80f6827480a43a34e9b6dfd073c7ed093f Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 28 Oct 2024 10:42:10 +0800 Subject: [PATCH 295/322] refactor: do not send membership request if changes are already applied Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/cluster_state.rs | 1 - crates/curp/src/client/connect.rs | 6 +++- crates/curp/src/client/retry.rs | 7 +++-- crates/curp/src/client/unary/mod.rs | 38 ++++++++++++++++++++----- 4 files changed, 41 insertions(+), 11 deletions(-) diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index 01dedc9ba..f012e3e72 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -256,7 +256,6 @@ impl ClusterStateFull { } /// Returns the membership of the state - #[cfg(test)] pub(crate) fn membership(&self) -> &Membership { &self.membership } diff --git a/crates/curp/src/client/connect.rs b/crates/curp/src/client/connect.rs index fc446d5bc..12297ed4b 100644 --- a/crates/curp/src/client/connect.rs +++ b/crates/curp/src/client/connect.rs @@ -102,11 +102,15 @@ pub(crate) trait RepeatableClientApi { ) -> Result; /// Performs membership change + /// + /// # Returns + /// + /// Returns `None` if the membership already applied to the cluster async fn change_membership( &self, changes: Vec, ctx: Context, - ) -> Result; + ) -> Result, Self::Error>; /// Send wait learner of the give ids, returns a stream of updating response stream async fn wait_learner( diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 6a2f2e047..c5000e77b 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -478,8 +478,11 @@ where let resp = self .retry::<_, _>(|client, ctx| client.change_membership(changes.clone(), ctx)) .await?; - let cluster_state = Fetch::build_cluster_state_from_response(self.fetch.connect_to(), resp); - self.cluster_state.update_with(cluster_state); + if let Some(resp) = resp { + let cluster_state = + Fetch::build_cluster_state_from_response(self.fetch.connect_to(), resp); + self.cluster_state.update_with(cluster_state); + } Ok(()) } diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 5eef499c4..4f759d8d4 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -6,17 +6,20 @@ use std::{collections::BTreeSet, marker::PhantomData}; use async_trait::async_trait; use curp_external_api::cmd::Command; use futures::Stream; -use tracing::warn; +use tracing::{debug, warn}; use super::{ config::Config, connect::{ProposeResponse, RepeatableClientApi}, retry::Context, }; -use crate::rpc::{ - Change, ChangeMembershipRequest, CurpError, FetchReadStateRequest, MembershipChange, - MembershipResponse, MoveLeaderRequest, ReadState, ShutdownRequest, WaitLearnerRequest, - WaitLearnerResponse, +use crate::{ + member::Membership, + rpc::{ + Change, ChangeMembershipRequest, CurpError, FetchReadStateRequest, MembershipChange, + MembershipResponse, MoveLeaderRequest, ReadState, ShutdownRequest, WaitLearnerRequest, + WaitLearnerResponse, + }, }; /// The unary client @@ -115,7 +118,11 @@ impl RepeatableClientApi for Unary { &self, changes: Vec, ctx: Context, - ) -> Result { + ) -> Result, Self::Error> { + if Self::change_applied(ctx.cluster_state().membership(), &changes) { + debug!("membership already applied, skipping changes"); + return Ok(None); + } let changes = changes .into_iter() .map(|c| MembershipChange { change: Some(c) }) @@ -132,7 +139,7 @@ impl RepeatableClientApi for Unary { .await? .into_inner(); - Ok(resp) + Ok(Some(resp)) } /// Send wait learner of the give ids, returns a stream of updating response stream @@ -156,3 +163,20 @@ impl RepeatableClientApi for Unary { Ok(resp) } } + +impl Unary { + /// Check if the changes already applied to the cluster membership + /// + /// TODO: Currently we do not send any request if the changes are already satisfied. However, + /// this may lead to some semantic ambiguity. For example, the id of a `Change::Remove` might + /// be invalid, but we still assume it has completed. A better implementation might be send a + /// full membership state to the cluster. + fn change_applied(membership: &Membership, changes: &[Change]) -> bool { + changes.iter().all(|change| match *change { + Change::Add(ref node) => membership.nodes.get(&node.node_id) == node.meta.as_ref(), + Change::Remove(id) => !membership.nodes.contains_key(&id), + Change::Promote(id) => membership.contains_member(id), + Change::Demote(id) => !membership.contains_member(id), + }) + } +} From 6255664ff6202aa9d285032b39e0ad4eb404903c Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sun, 3 Nov 2024 12:12:38 +0800 Subject: [PATCH 296/322] refactor: split curp client API * Split curp client API into repeatable part and non-repeatable part * Remove unused read state rpc Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/connect.rs | 45 +++--- crates/curp/src/client/retry.rs | 198 ++++++++++++++++++------ crates/curp/src/client/tests.rs | 2 +- crates/curp/src/client/unary/mod.rs | 79 ++++------ crates/curp/src/rpc/mod.rs | 32 ---- crates/curp/src/server/curp_node/mod.rs | 22 +-- crates/curp/src/server/mod.rs | 6 +- crates/curp/src/server/raw_curp/mod.rs | 22 --- 8 files changed, 211 insertions(+), 195 deletions(-) diff --git a/crates/curp/src/client/connect.rs b/crates/curp/src/client/connect.rs index 12297ed4b..4ea384bf9 100644 --- a/crates/curp/src/client/connect.rs +++ b/crates/curp/src/client/connect.rs @@ -6,7 +6,7 @@ use futures::Stream; use crate::{ members::ServerId, - rpc::{Change, MembershipResponse, ReadState, WaitLearnerResponse}, + rpc::{Change, MembershipResponse, WaitLearnerResponse}, }; use super::retry::Context; @@ -42,9 +42,6 @@ pub trait ClientApi { /// Send move leader request async fn move_leader(&self, node_id: ServerId) -> Result<(), Self::Error>; - /// Send fetch read state from leader - async fn fetch_read_state(&self, cmd: &Self::Cmd) -> Result; - /// Send fetch cluster requests to all servers (That's because initially, we didn't /// know who the leader is.) /// @@ -75,32 +72,12 @@ pub(crate) trait RepeatableClientApi { /// The client error type Error; - /// The command type - type Cmd: Command; - - /// Send propose to the whole cluster, `use_fast_path` set to `false` to fallback into ordered - /// requests (event the requests are commutative). - async fn propose( - &self, - cmd: &Self::Cmd, - token: Option<&String>, - use_fast_path: bool, - ctx: Context, - ) -> Result, Self::Error>; - /// Send propose to shutdown cluster async fn propose_shutdown(&self, ctx: Context) -> Result<(), Self::Error>; /// Send move leader request async fn move_leader(&self, node_id: u64, ctx: Context) -> Result<(), Self::Error>; - /// Send fetch read state from leader - async fn fetch_read_state( - &self, - cmd: &Self::Cmd, - ctx: Context, - ) -> Result; - /// Performs membership change /// /// # Returns @@ -122,3 +99,23 @@ pub(crate) trait RepeatableClientApi { Self::Error, >; } + +/// A trait for non-idempotent operations, clients with this trait will NOT be able to retry. +#[async_trait] +pub(crate) trait NonRepeatableClientApi { + /// The client error + type Error; + + /// The command type + type Cmd: Command; + + /// Send propose to the whole cluster, `use_fast_path` set to `false` to fallback into ordered + /// requests (event the requests are commutative). + async fn propose( + &self, + cmd: &Self::Cmd, + token: Option<&String>, + use_fast_path: bool, + ctx: Context, + ) -> Result, Self::Error>; +} diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index c5000e77b..798aa1274 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -1,3 +1,5 @@ +#![allow(clippy::same_name_method)] // TODO: use another name + use std::{ collections::BTreeSet, ops::SubAssign, @@ -6,6 +8,7 @@ use std::{ }; use async_trait::async_trait; +use curp_external_api::cmd::Command; use futures::{Future, Stream}; use parking_lot::RwLock; use tracing::{debug, warn}; @@ -13,7 +16,7 @@ use tracing::{debug, warn}; use super::{ cluster_state::{ClusterState, ClusterStateFull, ClusterStateInit}, config::Config, - connect::{ProposeResponse, RepeatableClientApi}, + connect::{NonRepeatableClientApi, ProposeResponse, RepeatableClientApi}, fetch::Fetch, keep_alive::{KeepAlive, KeepAliveHandle}, ClientApi, ProposeIdGuard, @@ -268,6 +271,39 @@ pub(super) struct Retry { tracker: CmdTracker, } +impl Retry { + /// Gets the context required for unary requests + async fn get_context(&self) -> Result { + let cluster_state = self.cluster_state.ready_or_fetch().await?; + // TODO: gen propose id + Ok(Context::new(ProposeId::default(), 0, cluster_state)) + } + + /// Updates the cluster state when error occurs. + fn update_cluster_state_on_error(&self, err: &CurpError) { + match *err { + // Some error that needs to update cluster state + CurpError::RpcTransport(()) + | CurpError::WrongClusterVersion(()) + | CurpError::Redirect(_) // FIXME: The redirect error needs to include full cluster state + | CurpError::Zombie(()) => { + self.cluster_state.errored(); + } + CurpError::KeyConflict(()) + | CurpError::Duplicated(()) + | CurpError::ExpiredClientId(()) + | CurpError::InvalidConfig(()) + | CurpError::NodeNotExists(()) + | CurpError::NodeAlreadyExists(()) + | CurpError::LearnerNotCatchUp(()) + | CurpError::ShuttingDown(()) + | CurpError::Internal(_) + | CurpError::LeaderTransfer(_) + | CurpError::InvalidMemberChange(()) => {}, + } + } +} + impl Retry where Api: RepeatableClientApi + Send + Sync + 'static, @@ -329,15 +365,13 @@ where let propose_id_guard = self.tracker.gen_propose_id(client_id); let first_incomplete = self.tracker.first_incomplete(); while let Some(delay) = backoff.next_delay() { - let fetch_result = self.cluster_state.ready_or_fetch().await; - let cluster_state = match fetch_result { + let context = match self.get_context().await { Ok(x) => x, Err(err) => { self.on_error(err, delay, &mut last_err).await?; continue; } }; - let context = Context::new(*propose_id_guard, first_incomplete, cluster_state.clone()); let result = f(&self.inner, context).await; match result { Ok(res) => return Ok(res), @@ -358,7 +392,8 @@ where delay: Duration, last_err: &mut Option, ) -> Result<(), tonic::Status> { - self.handle_err(&err)?; + Self::early_return(&err)?; + self.update_cluster_state_on_error(&err); #[cfg(feature = "client-metrics")] super::metrics::get().client_retry_count.add(1, &[]); @@ -374,7 +409,7 @@ where } /// Handles errors before another retry - fn handle_err(&self, err: &CurpError) -> Result<(), tonic::Status> { + fn early_return(err: &CurpError) -> Result<(), tonic::Status> { match *err { // some errors that should not retry CurpError::Duplicated(()) @@ -383,8 +418,7 @@ where | CurpError::NodeNotExists(()) | CurpError::NodeAlreadyExists(()) | CurpError::LearnerNotCatchUp(()) - | CurpError::InvalidMemberChange(()) - => { + | CurpError::InvalidMemberChange(()) => { return Err(tonic::Status::from(err.clone())); } @@ -392,15 +426,11 @@ where CurpError::ExpiredClientId(()) | CurpError::KeyConflict(()) | CurpError::Internal(_) - | CurpError::LeaderTransfer(_) => {} - - // Some error that needs to update cluster state - CurpError::RpcTransport(()) + | CurpError::LeaderTransfer(_) + | CurpError::RpcTransport(()) | CurpError::WrongClusterVersion(()) - | CurpError::Redirect(_) // FIXME: The redirect error needs to include full cluster state - | CurpError::Zombie(()) => { - self.cluster_state.errored(); - } + | CurpError::Redirect(_) + | CurpError::Zombie(()) => {} } Ok(()) @@ -413,31 +443,31 @@ where } } -#[async_trait] -impl ClientApi for Retry +impl Retry where - Api: RepeatableClientApi + Send + Sync + 'static, + Api: NonRepeatableClientApi + Send + Sync + 'static, { - /// The client error - type Error = tonic::Status; - - /// Inherit the command type - type Cmd = Api::Cmd; - - /// Send propose to the whole cluster, `use_fast_path` set to `false` to fallback into ordered - /// requests (event the requests are commutative). - async fn propose( - &self, - cmd: &Self::Cmd, - token: Option<&String>, - use_fast_path: bool, - ) -> Result, tonic::Status> { - self.retry::<_, _>(|client, ctx| async move { - RepeatableClientApi::propose(client, cmd, token, use_fast_path, ctx).await + /// Takes a function f and run once. + async fn once<'a, R, F>(&'a self, f: impl Fn(&'a Api, Context) -> F) -> Result + where + F: Future>, + { + let ctx = self.get_context().await.map_err(|err| { + self.update_cluster_state_on_error(&err); + err + })?; + + f(&self.inner, ctx).await.map_err(|err| { + self.update_cluster_state_on_error(&err); + err.into() }) - .await } +} +impl Retry +where + Api: RepeatableClientApi + Send + Sync + 'static, +{ /// Send propose to shutdown cluster async fn propose_shutdown(&self) -> Result<(), tonic::Status> { self.retry::<_, _>(|client, ctx| async move { @@ -447,17 +477,11 @@ where } /// Send move leader request - async fn move_leader(&self, node_id: u64) -> Result<(), Self::Error> { + async fn move_leader(&self, node_id: u64) -> Result<(), tonic::Status> { self.retry::<_, _>(|client, ctx| client.move_leader(node_id, ctx)) .await } - /// Send fetch read state from leader - async fn fetch_read_state(&self, cmd: &Self::Cmd) -> Result { - self.retry::<_, _>(|client, ctx| client.fetch_read_state(cmd, ctx)) - .await - } - /// Send fetch cluster requests to all servers (That's because initially, we didn't /// know who the leader is.) /// @@ -474,7 +498,7 @@ where } /// Performs membership change - async fn change_membership(&self, changes: Vec) -> Result<(), Self::Error> { + async fn change_membership(&self, changes: Vec) -> Result<(), tonic::Status> { let resp = self .retry::<_, _>(|client, ctx| client.change_membership(changes.clone(), ctx)) .await?; @@ -491,13 +515,95 @@ where async fn wait_learner( &self, node_ids: BTreeSet, - ) -> Result> + Send>, Self::Error> - { + ) -> Result< + Box> + Send>, + tonic::Status, + > { self.retry::<_, _>(|client, ctx| client.wait_learner(node_ids.clone(), ctx)) .await } } +impl Retry +where + C: Command, + Api: NonRepeatableClientApi + Send + Sync + 'static, +{ + /// Send propose to the whole cluster, `use_fast_path` set to `false` to fallback into ordered + /// requests (event the requests are commutative). + async fn propose( + &self, + cmd: &C, + token: Option<&String>, + use_fast_path: bool, + ) -> Result, tonic::Status> { + self.once::<_, _>(|client, ctx| async move { + NonRepeatableClientApi::propose(client, cmd, token, use_fast_path, ctx).await + }) + .await + } +} + +#[async_trait] +impl ClientApi for Retry +where + C: Command, + Api: NonRepeatableClientApi + + RepeatableClientApi + + Send + + Sync + + 'static, +{ + /// The client error + type Error = tonic::Status; + + /// The command type + type Cmd = C; + + /// Send propose to the whole cluster, `use_fast_path` set to `false` to fallback into ordered + /// requests (event the requests are commutative). + async fn propose( + &self, + cmd: &Self::Cmd, + token: Option<&String>, // TODO: Allow external custom interceptors, do not pass token in parameters + use_fast_path: bool, + ) -> Result, Self::Error> { + self.propose(cmd, token, use_fast_path).await + } + + /// Send propose to shutdown cluster + async fn propose_shutdown(&self) -> Result<(), Self::Error> { + self.propose_shutdown().await + } + + /// Send move leader request + async fn move_leader(&self, node_id: ServerId) -> Result<(), Self::Error> { + self.move_leader(node_id).await + } + + /// Send fetch cluster requests to all servers (That's because initially, we didn't + /// know who the leader is.) + /// + /// Note: The fetched cluster may still be outdated if `linearizable` is false + async fn fetch_cluster(&self, linearizable: bool) -> Result { + self.fetch_cluster(linearizable).await + } + + /// Performs membership change + async fn change_membership(&self, changes: Vec) -> Result<(), Self::Error> { + self.change_membership(changes).await + } + + /// Send wait learner of the give ids, returns a stream of updating response stream + async fn wait_learner( + &self, + node_ids: BTreeSet, + ) -> Result> + Send>, Self::Error> + { + self.wait_learner(node_ids).await + } +} + /// Tests for backoff #[cfg(test)] mod tests { diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index 0e25c146a..de9e756d6 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -15,7 +15,7 @@ use super::{cluster_state::ClusterState, config::Config, unary::Unary}; use crate::{ client::{ cluster_state::ClusterStateFull, - connect::RepeatableClientApi, + connect::NonRepeatableClientApi, fetch::Fetch, keep_alive::KeepAlive, retry::{Context, Retry, RetryConfig}, diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 4f759d8d4..90e092c25 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -6,19 +6,18 @@ use std::{collections::BTreeSet, marker::PhantomData}; use async_trait::async_trait; use curp_external_api::cmd::Command; use futures::Stream; -use tracing::{debug, warn}; +use tracing::debug; use super::{ config::Config, - connect::{ProposeResponse, RepeatableClientApi}, + connect::{NonRepeatableClientApi, ProposeResponse, RepeatableClientApi}, retry::Context, }; use crate::{ member::Membership, rpc::{ - Change, ChangeMembershipRequest, CurpError, FetchReadStateRequest, MembershipChange, - MembershipResponse, MoveLeaderRequest, ReadState, ShutdownRequest, WaitLearnerRequest, - WaitLearnerResponse, + Change, ChangeMembershipRequest, CurpError, MembershipChange, MembershipResponse, + MoveLeaderRequest, ShutdownRequest, WaitLearnerRequest, WaitLearnerResponse, }, }; @@ -46,26 +45,6 @@ impl RepeatableClientApi for Unary { /// The error is generated from server type Error = CurpError; - /// The command type - type Cmd = C; - - /// Send propose to the whole cluster, `use_fast_path` set to `false` to fallback into ordered - /// requests (event the requests are commutative). - async fn propose( - &self, - cmd: &Self::Cmd, - token: Option<&String>, - use_fast_path: bool, - ctx: Context, - ) -> Result, Self::Error> { - if cmd.is_read_only() { - self.propose_read_only(cmd, token, use_fast_path, &ctx) - .await - } else { - self.propose_mutative(cmd, token, use_fast_path, &ctx).await - } - } - /// Send propose to shutdown cluster async fn propose_shutdown(&self, ctx: Context) -> Result<(), Self::Error> { let req = ShutdownRequest::new(ctx.propose_id(), 0); @@ -90,30 +69,6 @@ impl RepeatableClientApi for Unary { Ok(()) } - /// Send fetch read state from leader - async fn fetch_read_state( - &self, - cmd: &Self::Cmd, - ctx: Context, - ) -> Result { - // Same as fast_round, we blame the serializing error to the server even - // thought it is the local error - let req = FetchReadStateRequest::new(cmd, 0).map_err(|ser_err| { - warn!("serializing error: {ser_err}"); - CurpError::from(ser_err) - })?; - let timeout = self.config.wait_synced_timeout(); - let state = ctx - .cluster_state() - .map_leader(|conn| async move { conn.fetch_read_state(req, timeout).await }) - .await? - .into_inner() - .read_state - .unwrap_or_else(|| unreachable!("read_state must be set in fetch read state response")); - - Ok(state) - } - async fn change_membership( &self, changes: Vec, @@ -164,6 +119,32 @@ impl RepeatableClientApi for Unary { } } +#[async_trait] +impl NonRepeatableClientApi for Unary { + /// The error is generated from server + type Error = CurpError; + + /// The command type + type Cmd = C; + + /// Send propose to the whole cluster, `use_fast_path` set to `false` to fallback into ordered + /// requests (event the requests are commutative). + async fn propose( + &self, + cmd: &Self::Cmd, + token: Option<&String>, + use_fast_path: bool, + ctx: Context, + ) -> Result, Self::Error> { + if cmd.is_read_only() { + self.propose_read_only(cmd, token, use_fast_path, &ctx) + .await + } else { + self.propose_mutative(cmd, token, use_fast_path, &ctx).await + } + } +} + impl Unary { /// Check if the changes already applied to the cluster membership /// diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index ef621ea57..7b9cc1537 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -3,7 +3,6 @@ use std::sync::Arc; use curp_external_api::{ cmd::{ConflictCheck, PbCodec, PbSerializeError}, conflict::EntryId, - InflightId, }; use prost::Message; use serde::{Deserialize, Serialize}; @@ -374,37 +373,6 @@ impl InstallSnapshotResponse { } } -impl IdSet { - /// Create a new `IdSet` - pub(crate) fn new(inflight_ids: Vec) -> Self { - Self { inflight_ids } - } -} - -impl FetchReadStateRequest { - /// Create a new fetch read state request - pub(crate) fn new(cmd: &C, cluster_version: u64) -> bincode::Result { - Ok(Self { - command: bincode::serialize(cmd)?, - cluster_version, - }) - } - - /// Get command - pub(crate) fn cmd(&self) -> bincode::Result { - bincode::deserialize(&self.command) - } -} - -impl FetchReadStateResponse { - /// Create a new fetch read state response - pub(crate) fn new(state: ReadState) -> Self { - Self { - read_state: Some(state), - } - } -} - impl ShutdownRequest { /// Create a new shutdown request pub(crate) fn new(id: ProposeId, cluster_version: u64) -> Self { diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 01fecfa1a..c1be1bac4 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -48,12 +48,11 @@ use crate::{ role_change::RoleChange, rpc::{ self, AppendEntriesRequest, AppendEntriesResponse, CurpError, FetchMembershipRequest, - FetchReadStateRequest, FetchReadStateResponse, InstallSnapshotRequest, - InstallSnapshotResponse, LeaseKeepAliveMsg, MembershipResponse, MoveLeaderRequest, - MoveLeaderResponse, PoolEntry, ProposeId, ProposeRequest, ProposeResponse, - ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, ShutdownResponse, - SyncedResponse, TriggerShutdownRequest, TriggerShutdownResponse, TryBecomeLeaderNowRequest, - TryBecomeLeaderNowResponse, VoteRequest, VoteResponse, + InstallSnapshotRequest, InstallSnapshotResponse, LeaseKeepAliveMsg, MembershipResponse, + MoveLeaderRequest, MoveLeaderResponse, PoolEntry, ProposeId, ProposeRequest, + ProposeResponse, ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, + ShutdownResponse, SyncedResponse, TriggerShutdownRequest, TriggerShutdownResponse, + TryBecomeLeaderNowRequest, TryBecomeLeaderNowResponse, VoteRequest, VoteResponse, }, server::{ cmd_worker::{after_sync, worker_reset, worker_snapshot}, @@ -594,17 +593,6 @@ impl, RC: RoleChange> CurpNode { )) } - /// Handle `FetchReadState` requests - #[allow(clippy::needless_pass_by_value)] // To keep type consistent with other request handlers - pub(super) fn fetch_read_state( - &self, - req: FetchReadStateRequest, - ) -> Result { - let cmd = req.cmd()?; - let state = self.curp.handle_fetch_read_state(Arc::new(cmd)); - Ok(FetchReadStateResponse::new(state)) - } - /// Handle `MoveLeader` requests pub(super) async fn move_leader( &self, diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index 6de1f6dc0..280be270f 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -154,11 +154,9 @@ impl, RC: RoleChange> crate::rpc::Protocol fo #[instrument(skip_all, name = "curp_fetch_read_state")] async fn fetch_read_state( &self, - request: tonic::Request, + _request: tonic::Request, ) -> Result, tonic::Status> { - Ok(tonic::Response::new( - self.inner.fetch_read_state(request.into_inner())?, - )) + Err(tonic::Status::unimplemented("unimplemented")) } #[instrument(skip_all, name = "curp_move_leader")] diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 1f5418a93..676bb8fdb 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -73,10 +73,8 @@ use crate::role_change::RoleChange; use crate::rpc::connect::InnerConnectApi; use crate::rpc::connect::InnerConnectApiWrapper; use crate::rpc::CurpError; -use crate::rpc::IdSet; use crate::rpc::PoolEntry; use crate::rpc::ProposeId; -use crate::rpc::ReadState; use crate::rpc::Redirect; use crate::server::cmd_board::CmdBoardRef; use crate::server::metrics; @@ -1100,26 +1098,6 @@ impl RawCurp { validate } - /// Handle `fetch_read_state` - pub(super) fn handle_fetch_read_state(&self, cmd: Arc) -> ReadState { - let ids: Vec<_> = self - .ctx - .uncommitted_pool - .map_lock(|ucp| ucp.all_conflict(&PoolEntry::new(ProposeId::default(), cmd))) - .into_iter() - .map(|entry| entry.id) - .collect(); - if ids.is_empty() { - ReadState::CommitIndex(self.log.read().commit_index) - } else { - ReadState::Ids(IdSet::new( - ids.into_iter() - .map(crate::log_entry::propose_id_to_inflight_id) - .collect(), - )) - } - } - /// Handle `move_leader` pub(super) fn handle_move_leader(&self, target_id: ServerId) -> Result { debug!("{} received move leader to {}", self.id(), target_id); From 74ef8b58a5e20cd53c149fb12fb6cf40ef035db3 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sun, 3 Nov 2024 17:12:33 +0800 Subject: [PATCH 297/322] refactor: remove curp client keep alive Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- .../curp/src/client/{ => dedup_impl}/keep_alive.rs | 0 crates/curp/src/client/dedup_impl/mod.rs | 1 + crates/curp/src/client/mod.rs | 13 ++++--------- crates/curp/src/client/retry.rs | 9 ++------- crates/curp/src/client/tests.rs | 5 ----- 5 files changed, 7 insertions(+), 21 deletions(-) rename crates/curp/src/client/{ => dedup_impl}/keep_alive.rs (100%) create mode 100644 crates/curp/src/client/dedup_impl/mod.rs diff --git a/crates/curp/src/client/keep_alive.rs b/crates/curp/src/client/dedup_impl/keep_alive.rs similarity index 100% rename from crates/curp/src/client/keep_alive.rs rename to crates/curp/src/client/dedup_impl/keep_alive.rs diff --git a/crates/curp/src/client/dedup_impl/mod.rs b/crates/curp/src/client/dedup_impl/mod.rs new file mode 100644 index 000000000..304e2e7a7 --- /dev/null +++ b/crates/curp/src/client/dedup_impl/mod.rs @@ -0,0 +1 @@ +mod keep_alive; diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index 77468ec69..ec75c85ff 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -24,10 +24,6 @@ mod fetch; /// Config of the client mod config; -#[allow(unused)] -/// Lease keep alive implementation -mod keep_alive; - /// Connect APIs mod connect; @@ -35,6 +31,10 @@ mod connect; #[cfg(test)] mod tests; +/// Deprecate dedup implementation +#[cfg(ignore)] +mod dedup_impl; + #[allow(clippy::module_name_repetitions)] // More conprehensive than just `Api` pub use connect::ClientApi; @@ -55,7 +55,6 @@ use self::{ cluster_state::{ClusterState, ClusterStateInit}, config::Config, fetch::{ConnectToCluster, Fetch}, - keep_alive::KeepAlive, retry::{Retry, RetryConfig}, unary::Unary, }; @@ -310,7 +309,6 @@ impl ClientBuilder { ) -> Result + Send + Sync + 'static, tonic::Status> { let config = self.init_config(None); - let keep_alive = KeepAlive::new(*self.config.keep_alive_interval()); let fetch = Fetch::new( *self.config.wait_synced_timeout(), self.build_connect_to(None), @@ -319,7 +317,6 @@ impl ClientBuilder { let client = Retry::new( Unary::new(config), self.init_retry_config(), - keep_alive, fetch, cluster_state, ); @@ -380,7 +377,6 @@ impl ClientBuilderWithBypass

{ ) -> Result, tonic::Status> { let bypassed = Self::bypassed_connect(self.local_server_id, self.local_server); let config = self.inner.init_config(Some(self.local_server_id)); - let keep_alive = KeepAlive::new(*self.inner.config.keep_alive_interval()); let fetch = Fetch::new( *self.inner.config.wait_synced_timeout(), self.inner.build_connect_to(Some(bypassed)), @@ -389,7 +385,6 @@ impl ClientBuilderWithBypass

{ let client = Retry::new( Unary::new(config), self.inner.init_retry_config(), - keep_alive, fetch, cluster_state, ); diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 798aa1274..8c8206590 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -18,7 +18,6 @@ use super::{ config::Config, connect::{NonRepeatableClientApi, ProposeResponse, RepeatableClientApi}, fetch::Fetch, - keep_alive::{KeepAlive, KeepAliveHandle}, ClientApi, ProposeIdGuard, }; use crate::{ @@ -263,8 +262,6 @@ pub(super) struct Retry { retry_config: RetryConfig, /// Cluster state cluster_state: Arc, - /// Keep alive client - keep_alive: KeepAliveHandle, /// Fetch cluster object fetch: Fetch, /// Command tracker @@ -312,17 +309,14 @@ where pub(super) fn new( inner: Api, retry_config: RetryConfig, - keep_alive: KeepAlive, fetch: Fetch, cluster_state: ClusterState, ) -> Self { let cluster_state = Arc::new(ClusterStateShared::new(cluster_state, fetch.clone())); - let keep_alive_handle = keep_alive.spawn_keep_alive(Arc::clone(&cluster_state)); Self { inner, retry_config, cluster_state, - keep_alive: keep_alive_handle, fetch, tracker: CmdTracker::default(), } @@ -361,7 +355,8 @@ where { let mut backoff = self.retry_config.init_backoff(); let mut last_err = None; - let client_id = self.keep_alive.client_id().await; + // TODO: generate client id + let client_id = 0; let propose_id_guard = self.tracker.gen_propose_id(client_id); let first_incomplete = self.tracker.first_incomplete(); while let Some(delay) = backoff.next_delay() { diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index de9e756d6..bcab76645 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -17,7 +17,6 @@ use crate::{ cluster_state::ClusterStateFull, connect::NonRepeatableClientApi, fetch::Fetch, - keep_alive::KeepAlive, retry::{Context, Retry, RetryConfig}, ClientApi, }, @@ -321,7 +320,6 @@ async fn test_retry_propose_return_no_retry_error() { let retry = Retry::new( unary, RetryConfig::new_fixed(Duration::from_millis(100), 5), - KeepAlive::new(Duration::from_secs(1)), Fetch::new_disable(), ClusterState::Full(cluster_state), ); @@ -363,7 +361,6 @@ async fn test_retry_propose_return_retry_error() { let retry = Retry::new( unary, RetryConfig::new_fixed(Duration::from_millis(10), 5), - KeepAlive::new(Duration::from_secs(1)), Fetch::new(Duration::from_secs(1), move |_| connects.clone()), ClusterState::Full(cluster_state), ); @@ -391,7 +388,6 @@ async fn test_retry_will_update_state_on_error() { let retry = Retry::new( unary, RetryConfig::new_fixed(Duration::from_millis(10), 5), - KeepAlive::new(Duration::from_secs(1)), Fetch::new(Duration::from_secs(1), move |_| connects.clone()), ClusterState::Full(cluster_state), ); @@ -426,7 +422,6 @@ async fn test_retry_will_update_state_on_change_membership() { let retry = Retry::new( unary, RetryConfig::new_fixed(Duration::from_millis(10), 5), - KeepAlive::new(Duration::from_secs(1)), Fetch::new_disable(), ClusterState::Full(cluster_state), ); From 98161f91bc81fdc86b40f9943487ae3f1d2d2f39 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sun, 3 Nov 2024 17:22:59 +0800 Subject: [PATCH 298/322] refactor: remove curp client cmd id tracker Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/dedup_impl/mod.rs | 2 + .../curp/src/client/dedup_impl/propose_id.rs | 59 +++++++++++++++++++ crates/curp/src/client/mod.rs | 38 +----------- crates/curp/src/client/retry.rs | 49 +++------------ crates/curp/src/client/unary/propose_impl.rs | 10 +--- crates/curp/src/rpc/mod.rs | 3 +- 6 files changed, 73 insertions(+), 88 deletions(-) create mode 100644 crates/curp/src/client/dedup_impl/propose_id.rs diff --git a/crates/curp/src/client/dedup_impl/mod.rs b/crates/curp/src/client/dedup_impl/mod.rs index 304e2e7a7..31f969d6c 100644 --- a/crates/curp/src/client/dedup_impl/mod.rs +++ b/crates/curp/src/client/dedup_impl/mod.rs @@ -1 +1,3 @@ mod keep_alive; + +mod propose_id; diff --git a/crates/curp/src/client/dedup_impl/propose_id.rs b/crates/curp/src/client/dedup_impl/propose_id.rs new file mode 100644 index 000000000..f05e5e90a --- /dev/null +++ b/crates/curp/src/client/dedup_impl/propose_id.rs @@ -0,0 +1,59 @@ +/// Propose id guard, used to ensure the sequence of propose id is recorded. +struct ProposeIdGuard<'a> { + /// The propose id + propose_id: ProposeId, + /// The tracker + tracker: &'a RwLock, +} + +impl Deref for ProposeIdGuard<'_> { + type Target = ProposeId; + + fn deref(&self) -> &Self::Target { + &self.propose_id + } +} + +impl<'a> ProposeIdGuard<'a> { + /// Create a new propose id guard + fn new(tracker: &'a RwLock, propose_id: ProposeId) -> Self { + Self { + propose_id, + tracker, + } + } +} + +impl Drop for ProposeIdGuard<'_> { + fn drop(&mut self) { + let _ig = self.tracker.write().record(self.propose_id.1); + } +} + +/// Command tracker +#[derive(Debug, Default)] +struct CmdTracker { + /// Last sent sequence number + last_sent_seq: AtomicU64, + /// Request tracker + tracker: RwLock, +} + +impl CmdTracker { + /// New a seq num and record it + fn new_seq_num(&self) -> u64 { + self.last_sent_seq + .fetch_add(1, std::sync::atomic::Ordering::Relaxed) + } + + /// Generate a unique propose id during the retry process. + fn gen_propose_id(&self, client_id: u64) -> ProposeIdGuard<'_> { + let seq_num = self.new_seq_num(); + ProposeIdGuard::new(&self.tracker, ProposeId(client_id, seq_num)) + } + + /// Generate a unique propose id during the retry process. + fn first_incomplete(&self) -> u64 { + self.tracker.read().first_incomplete() + } +} diff --git a/crates/curp/src/client/mod.rs b/crates/curp/src/client/mod.rs index ec75c85ff..55a7fd737 100644 --- a/crates/curp/src/client/mod.rs +++ b/crates/curp/src/client/mod.rs @@ -40,10 +40,9 @@ pub use connect::ClientApi; #[cfg(madsim)] use std::sync::atomic::AtomicU64; -use std::{collections::HashMap, ops::Deref, sync::Arc}; +use std::{collections::HashMap, sync::Arc}; use curp_external_api::cmd::Command; -use parking_lot::RwLock; #[cfg(not(madsim))] use tonic::transport::ClientTlsConfig; use tracing::debug; @@ -64,44 +63,11 @@ use crate::{ rpc::{ self, connect::{BypassedConnect, ConnectApi}, - MembershipResponse, NodeMetadata, ProposeId, Protocol, + MembershipResponse, NodeMetadata, Protocol, }, server::StreamingProtocol, - tracker::Tracker, }; -/// Propose id guard, used to ensure the sequence of propose id is recorded. -struct ProposeIdGuard<'a> { - /// The propose id - propose_id: ProposeId, - /// The tracker - tracker: &'a RwLock, -} - -impl Deref for ProposeIdGuard<'_> { - type Target = ProposeId; - - fn deref(&self) -> &Self::Target { - &self.propose_id - } -} - -impl<'a> ProposeIdGuard<'a> { - /// Create a new propose id guard - fn new(tracker: &'a RwLock, propose_id: ProposeId) -> Self { - Self { - propose_id, - tracker, - } - } -} - -impl Drop for ProposeIdGuard<'_> { - fn drop(&mut self) { - let _ig = self.tracker.write().record(self.propose_id.1); - } -} - /// Sets the initial cluster for the client builder #[derive(Debug, Clone)] enum SetCluster { diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 8c8206590..dd0306c03 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -18,7 +18,7 @@ use super::{ config::Config, connect::{NonRepeatableClientApi, ProposeResponse, RepeatableClientApi}, fetch::Fetch, - ClientApi, ProposeIdGuard, + ClientApi, }; use crate::{ members::ServerId, @@ -143,45 +143,12 @@ impl Context { self.propose_id } - /// Returns the first incomplete sequence number - pub(crate) fn first_incomplete(&self) -> u64 { - self.first_incomplete - } - /// Returns the current client id pub(crate) fn cluster_state(&self) -> ClusterStateFull { self.cluster_state.clone() } } -/// Command tracker -#[derive(Debug, Default)] -struct CmdTracker { - /// Last sent sequence number - last_sent_seq: AtomicU64, - /// Request tracker - tracker: RwLock, -} - -impl CmdTracker { - /// New a seq num and record it - fn new_seq_num(&self) -> u64 { - self.last_sent_seq - .fetch_add(1, std::sync::atomic::Ordering::Relaxed) - } - - /// Generate a unique propose id during the retry process. - fn gen_propose_id(&self, client_id: u64) -> ProposeIdGuard<'_> { - let seq_num = self.new_seq_num(); - ProposeIdGuard::new(&self.tracker, ProposeId(client_id, seq_num)) - } - - /// Generate a unique propose id during the retry process. - fn first_incomplete(&self) -> u64 { - self.tracker.read().first_incomplete() - } -} - /// A shared cluster state #[derive(Debug)] pub(crate) struct ClusterStateShared { @@ -264,16 +231,17 @@ pub(super) struct Retry { cluster_state: Arc, /// Fetch cluster object fetch: Fetch, - /// Command tracker - tracker: CmdTracker, + /// The client id + client_id: u64, } impl Retry { /// Gets the context required for unary requests async fn get_context(&self) -> Result { + let propose_id = ProposeId(self.client_id, rand::random()); let cluster_state = self.cluster_state.ready_or_fetch().await?; // TODO: gen propose id - Ok(Context::new(ProposeId::default(), 0, cluster_state)) + Ok(Context::new(propose_id, 0, cluster_state)) } /// Updates the cluster state when error occurs. @@ -312,13 +280,14 @@ where fetch: Fetch, cluster_state: ClusterState, ) -> Self { + let client_id: u64 = rand::random(); let cluster_state = Arc::new(ClusterStateShared::new(cluster_state, fetch.clone())); Self { inner, retry_config, cluster_state, fetch, - tracker: CmdTracker::default(), + client_id, } } @@ -355,10 +324,6 @@ where { let mut backoff = self.retry_config.init_backoff(); let mut last_err = None; - // TODO: generate client id - let client_id = 0; - let propose_id_guard = self.tracker.gen_propose_id(client_id); - let first_incomplete = self.tracker.first_incomplete(); while let Some(delay) = backoff.next_delay() { let context = match self.get_context().await { Ok(x) => x, diff --git a/crates/curp/src/client/unary/propose_impl.rs b/crates/curp/src/client/unary/propose_impl.rs index 159f1d69f..37251355f 100644 --- a/crates/curp/src/client/unary/propose_impl.rs +++ b/crates/curp/src/client/unary/propose_impl.rs @@ -137,14 +137,8 @@ impl Unary { ) -> EventStream<'_, C> { let term = ctx.cluster_state().term(); let cluster_version = ctx.cluster_state().cluster_version(); - let propose_req = ProposeRequest::new::( - ctx.propose_id(), - cmd, - cluster_version, - term, - !use_fast_path, - ctx.first_incomplete(), - ); + let propose_req = + ProposeRequest::new::(ctx.propose_id(), cmd, cluster_version, term, !use_fast_path); let timeout = self.config.propose_timeout(); let token = token.cloned(); let stream = ctx diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index 7b9cc1537..d97bca0d0 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -143,7 +143,6 @@ impl ProposeRequest { cluster_version: u64, term: u64, slow_path: bool, - first_incomplete: u64, ) -> Self { Self { propose_id: Some(propose_id.into()), @@ -151,7 +150,7 @@ impl ProposeRequest { cluster_version, term, slow_path, - first_incomplete, + first_incomplete: 0, } } From 4c7138793289b4a13e302ab754dbd5f01f2b7030 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sun, 3 Nov 2024 17:37:12 +0800 Subject: [PATCH 299/322] refactor: remove curp server duplication check Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/mod.rs | 24 +----------- crates/curp/src/server/mod.rs | 3 +- crates/curp/src/server/raw_curp/dedup.rs | 48 ++++++++++++++++++++++++ crates/curp/src/server/raw_curp/mod.rs | 44 ++-------------------- 4 files changed, 54 insertions(+), 65 deletions(-) create mode 100644 crates/curp/src/server/raw_curp/dedup.rs diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index c1be1bac4..d0d335c98 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -9,7 +9,6 @@ use clippy_utilities::NumericCast; use engine::{SnapshotAllocator, SnapshotApi}; use futures::{future::join_all, pin_mut, stream::FuturesUnordered, FutureExt, Stream, StreamExt}; use madsim::rand::{thread_rng, Rng}; -use opentelemetry::KeyValue; use parking_lot::{Mutex, RwLock}; use tokio::{sync::oneshot, time::MissedTickBehavior}; #[cfg(not(madsim))] @@ -143,7 +142,7 @@ pub(super) struct CurpNode, RC: RoleChange> { /// Handlers for clients impl, RC: RoleChange> CurpNode { /// Handle `ProposeStream` requests - pub(super) async fn propose_stream( + pub(super) fn propose_stream( &self, req: &ProposeRequest, resp_tx: Arc, @@ -166,27 +165,6 @@ impl, RC: RoleChange> CurpNode { self.curp.mark_client_id_bypassed(req.propose_id().0); } - match self - .curp - .deduplicate(req.propose_id(), Some(req.first_incomplete)) - { - // If the propose is duplicated, return the result directly - Err(CurpError::Duplicated(())) => { - let (er, asr) = - CommandBoard::wait_for_er_asr(&self.cmd_board, req.propose_id()).await; - resp_tx.send_propose(ProposeResponse::new_result::(&er, true)); - resp_tx.send_synced(SyncedResponse::new_result::(&asr)); - } - Err(CurpError::ExpiredClientId(())) => { - metrics::get() - .proposals_failed - .add(1, &[KeyValue::new("reason", "duplicated proposal")]); - return Err(CurpError::expired_client_id()); - } - Err(_) => unreachable!("deduplicate won't return other type of errors"), - Ok(()) => {} - } - let propose = Propose::try_new(req, resp_tx)?; let _ignore = self.propose_tx.send(propose); diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index 280be270f..7dcabad7b 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -115,8 +115,7 @@ impl, RC: RoleChange> crate::rpc::Protocol fo let (tx, rx) = flume::bounded(2); let resp_tx = Arc::new(ResponseSender::new(tx)); self.inner - .propose_stream(&request.into_inner(), resp_tx, bypassed) - .await?; + .propose_stream(&request.into_inner(), resp_tx, bypassed)?; Ok(tonic::Response::new(rx.into_stream())) } diff --git a/crates/curp/src/server/raw_curp/dedup.rs b/crates/curp/src/server/raw_curp/dedup.rs new file mode 100644 index 000000000..0e9c188e9 --- /dev/null +++ b/crates/curp/src/server/raw_curp/dedup.rs @@ -0,0 +1,48 @@ +use curp_external_api::{cmd::Command, role_change::RoleChange}; + +use crate::{ + rpc::{CurpError, ProposeId}, + server::cmd_board::CommandBoard, +}; + +use super::RawCurp; + +impl RawCurp { + /// Process deduplication and acknowledge the `first_incomplete` for this + /// client id + pub(crate) fn deduplicate( + &self, + ProposeId(client_id, seq_num): ProposeId, + first_incomplete: Option, + ) -> Result<(), CurpError> { + // deduplication + if self.ctx.lm.read().check_alive(client_id) { + let mut cb_w = self.ctx.cb.write(); + let tracker = cb_w.tracker(client_id); + if tracker.only_record(seq_num) { + // TODO: obtain the previous ER from cmd_board and packed into + // CurpError::Duplicated as an entry. + return Err(CurpError::duplicated()); + } + if let Some(first_incomplete) = first_incomplete { + let before = tracker.first_incomplete(); + if tracker.must_advance_to(first_incomplete) { + for seq_num_ack in before..first_incomplete { + Self::ack(ProposeId(client_id, seq_num_ack), &mut cb_w); + } + } + } + } else { + self.ctx.cb.write().client_expired(client_id); + return Err(CurpError::expired_client_id()); + } + Ok(()) + } + + /// Acknowledge the propose id and GC it's cmd board result + fn ack(id: ProposeId, cb: &mut CommandBoard) { + let _ignore_er = cb.er_buffer.swap_remove(&id); + let _ignore_asr = cb.asr_buffer.swap_remove(&id); + let _ignore_conf = cb.conf_buffer.swap_remove(&id); + } +} diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 676bb8fdb..5527f8f19 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -53,7 +53,6 @@ use self::node_state::NodeStates; use self::state::CandidateState; use self::state::LeaderState; use self::state::State; -use super::cmd_board::CommandBoard; use super::conflict::spec_pool_new::SpeculativePool; use super::conflict::uncommitted_pool::UncommittedPool; use super::curp_node::TaskType; @@ -104,6 +103,10 @@ mod monitor; /// Log replication implementation pub(crate) mod replication; +#[cfg(ignore)] +/// Dedup implementation +mod dedup; + /// The curp state machine pub struct RawCurp { /// Curp state @@ -719,7 +722,6 @@ impl RawCurp { if self.lst.get_transferee().is_some() { return Err(CurpError::LeaderTransfer("leader transferring".to_owned())); } - self.deduplicate(propose_id, None)?; let index = self.push_log_entry(propose_id, EntryData::Shutdown).index; debug!("{} gets new log[{index}]", self.id()); @@ -1708,44 +1710,6 @@ impl RawCurp { } } - /// Process deduplication and acknowledge the `first_incomplete` for this - /// client id - pub(crate) fn deduplicate( - &self, - ProposeId(client_id, seq_num): ProposeId, - first_incomplete: Option, - ) -> Result<(), CurpError> { - // deduplication - if self.ctx.lm.read().check_alive(client_id) { - let mut cb_w = self.ctx.cb.write(); - let tracker = cb_w.tracker(client_id); - if tracker.only_record(seq_num) { - // TODO: obtain the previous ER from cmd_board and packed into - // CurpError::Duplicated as an entry. - return Err(CurpError::duplicated()); - } - if let Some(first_incomplete) = first_incomplete { - let before = tracker.first_incomplete(); - if tracker.must_advance_to(first_incomplete) { - for seq_num_ack in before..first_incomplete { - Self::ack(ProposeId(client_id, seq_num_ack), &mut cb_w); - } - } - } - } else { - self.ctx.cb.write().client_expired(client_id); - return Err(CurpError::expired_client_id()); - } - Ok(()) - } - - /// Acknowledge the propose id and GC it's cmd board result - fn ack(id: ProposeId, cb: &mut CommandBoard) { - let _ignore_er = cb.er_buffer.swap_remove(&id); - let _ignore_asr = cb.asr_buffer.swap_remove(&id); - let _ignore_conf = cb.conf_buffer.swap_remove(&id); - } - /// Update match index, also updates the monitoring ids pub(crate) fn update_match_index(&self, id: u64, index: LogIndex) { self.ctx.node_states.update_match_index(id, index); From d0a20d80c2d1e0f77508405d2ba01b853e78cc3d Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 4 Nov 2024 15:57:48 +0800 Subject: [PATCH 300/322] refactor: remove curp server er and asr buffer Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/cmd_board.rs | 181 +---------------------- crates/curp/src/server/cmd_worker/mod.rs | 21 +-- crates/curp/src/server/curp_node/mod.rs | 2 +- crates/curp/src/server/gc.rs | 158 +------------------- crates/curp/src/server/raw_curp/mod.rs | 6 +- 5 files changed, 15 insertions(+), 353 deletions(-) diff --git a/crates/curp/src/server/cmd_board.rs b/crates/curp/src/server/cmd_board.rs index 64169323a..899e002d3 100644 --- a/crates/curp/src/server/cmd_board.rs +++ b/crates/curp/src/server/cmd_board.rs @@ -1,125 +1,34 @@ -#![allow(unused)] // TODO remove - use std::{collections::HashMap, sync::Arc}; use event_listener::{Event, EventListener}; -use indexmap::{IndexMap, IndexSet}; use parking_lot::RwLock; -use utils::parking_lot_lock::RwLockMap; -use crate::{cmd::Command, rpc::ProposeId, tracker::Tracker}; +use crate::tracker::Tracker; /// Ref to the cmd board -pub(super) type CmdBoardRef = Arc>>; +pub(super) type CmdBoardRef = Arc>; /// Command board is a buffer to track cmd states and store notifiers for requests that need to wait for a cmd #[derive(Debug)] -pub(super) struct CommandBoard { - /// Store all notifiers for execution results - er_notifiers: HashMap, - /// Store all notifiers for after sync results - asr_notifiers: HashMap, +pub(super) struct CommandBoard { /// Store the shutdown notifier shutdown_notifier: Event, - /// Store all notifiers for conf change results - conf_notifier: HashMap, /// The result trackers track all cmd, this is used for dedup pub(super) trackers: HashMap, - /// Store all conf change propose ids - pub(super) conf_buffer: IndexSet, - /// Store all execution results - pub(super) er_buffer: IndexMap>, - /// Store all after sync results - pub(super) asr_buffer: IndexMap>, } -impl CommandBoard { +impl CommandBoard { /// Create an empty command board pub(super) fn new() -> Self { Self { - er_notifiers: HashMap::new(), - asr_notifiers: HashMap::new(), shutdown_notifier: Event::new(), trackers: HashMap::new(), - er_buffer: IndexMap::new(), - asr_buffer: IndexMap::new(), - conf_notifier: HashMap::new(), - conf_buffer: IndexSet::new(), } } - /// Get the tracker for a client id - pub(super) fn tracker(&mut self, client_id: u64) -> &mut Tracker { - self.trackers.entry(client_id).or_default() - } - - /// Remove client result tracker from trackers if it is expired - pub(super) fn client_expired(&mut self, client_id: u64) { - let _ig = self.trackers.remove(&client_id); - } - - /// Release notifiers - pub(super) fn release_notifiers(&mut self) { - self.er_notifiers.drain().for_each(|(_, event)| { - let _ignore = event.notify(usize::MAX); - }); - self.asr_notifiers.drain().for_each(|(_, event)| { - let _ignore = event.notify(usize::MAX); - }); - } - /// Clear, called when leader retires pub(super) fn clear(&mut self) { - self.er_buffer.clear(); - self.asr_buffer.clear(); self.trackers.clear(); - self.release_notifiers(); - } - - /// Insert er to internal buffer - pub(super) fn insert_er(&mut self, id: ProposeId, er: Result) { - let er_ok = er.is_ok(); - assert!( - self.er_buffer.insert(id, er).is_none(), - "er should not be inserted twice" - ); - - self.notify_er(&id); - - // wait_synced response is also ready when execution fails - if !er_ok { - self.notify_asr(&id); - } - } - - /// Insert asr to internal buffer - pub(super) fn insert_asr(&mut self, id: ProposeId, asr: Result) { - assert!( - self.asr_buffer.insert(id, asr).is_none(), - "asr should not be inserted twice" - ); - - self.notify_asr(&id); - } - - /// Insert conf change result to internal buffer - pub(super) fn insert_conf(&mut self, id: ProposeId) { - assert!( - self.conf_buffer.insert(id), - "conf should not be inserted twice" - ); - - self.notify_conf(&id); - } - - /// Get a listener for execution result - fn er_listener(&mut self, id: ProposeId) -> EventListener { - let event = self.er_notifiers.entry(id).or_default(); - let listener = event.listen(); - if self.er_buffer.contains_key(&id) { - let _ignore = event.notify(usize::MAX); - } - listener } /// Get a listener for shutdown @@ -127,94 +36,14 @@ impl CommandBoard { self.shutdown_notifier.listen() } - /// Get a listener for after sync result - fn asr_listener(&mut self, id: ProposeId) -> EventListener { - let event = self.asr_notifiers.entry(id).or_default(); - let listener = event.listen(); - if self.asr_buffer.contains_key(&id) { - let _ignore = event.notify(usize::MAX); - } - listener - } - - /// Get a listener for conf change result - fn conf_listener(&mut self, id: ProposeId) -> EventListener { - let event = self.conf_notifier.entry(id).or_default(); - let listener = event.listen(); - if self.conf_buffer.contains(&id) { - let _ignore = event.notify(usize::MAX); - } - listener - } - - /// Notify execution results - fn notify_er(&mut self, id: &ProposeId) { - if let Some(notifier) = self.er_notifiers.remove(id) { - let _ignore = notifier.notify(usize::MAX); - } - } - - /// Notify `wait_synced` requests - fn notify_asr(&mut self, id: &ProposeId) { - if let Some(notifier) = self.asr_notifiers.remove(id) { - let _ignore = notifier.notify(usize::MAX); - } - } - /// Notify `shutdown` requests pub(super) fn notify_shutdown(&mut self) { let _ignore = self.shutdown_notifier.notify(usize::MAX); } - /// Notify `wait_synced` requests - fn notify_conf(&mut self, id: &ProposeId) { - if let Some(notifier) = self.conf_notifier.remove(id) { - let _ignore = notifier.notify(usize::MAX); - } - } - - /// Wait for an execution result - pub(super) async fn wait_for_er(cb: &CmdBoardRef, id: ProposeId) -> Result { - loop { - if let Some(er) = cb.map_read(|cb_r| cb_r.er_buffer.get(&id).cloned()) { - return er; - } - let listener = cb.write().er_listener(id); - listener.await; - } - } - /// Wait for an execution result - pub(super) async fn wait_for_shutdown_synced(cb: &CmdBoardRef) { + pub(super) async fn wait_for_shutdown_synced(cb: &CmdBoardRef) { let listener = cb.write().shutdown_listener(); listener.await; } - - /// Wait for an after sync result - pub(super) async fn wait_for_er_asr( - cb: &CmdBoardRef, - id: ProposeId, - ) -> (Result, Result) { - loop { - { - let cb_r = cb.read(); - if let (Some(er), Some(asr)) = (cb_r.er_buffer.get(&id), cb_r.asr_buffer.get(&id)) { - return (er.clone(), asr.clone()); - } - } - let listener = cb.write().asr_listener(id); - listener.await; - } - } - - /// Wait for an conf change result - pub(super) async fn wait_for_conf(cb: &CmdBoardRef, id: ProposeId) { - loop { - if let Some(_ccr) = cb.map_read(|cb_r| cb_r.conf_buffer.get(&id).copied()) { - return; - } - let listener = cb.write().conf_listener(id); - listener.await; - } - } } diff --git a/crates/curp/src/server/cmd_worker/mod.rs b/crates/curp/src/server/cmd_worker/mod.rs index 90e7c8475..83704d8a7 100644 --- a/crates/curp/src/server/cmd_worker/mod.rs +++ b/crates/curp/src/server/cmd_worker/mod.rs @@ -13,7 +13,7 @@ use crate::{ log_entry::{EntryData, LogEntry}, response::ResponseSender, role_change::RoleChange, - rpc::{PoolEntry, ProposeId, ProposeResponse, SyncedResponse}, + rpc::{PoolEntry, ProposeResponse, SyncedResponse}, snapshot::{Snapshot, SnapshotMeta}, }; @@ -45,7 +45,6 @@ pub(super) fn execute, RC: RoleChange>( ce: &CE, curp: &RawCurp, ) -> Result, ::Error> { - let cb = curp.cmd_board(); let id = curp.id(); let EntryData::Command(ref cmd) = entry.entry_data else { unreachable!("should not speculative execute {:?}", entry.entry_data); @@ -54,8 +53,6 @@ pub(super) fn execute, RC: RoleChange>( ce.execute_ro(cmd).map(|(er, asr)| (er, Some(asr))) } else { let er = ce.execute(cmd); - let mut cb_w = cb.write(); - cb_w.insert_er(entry.propose_id, er.clone()); debug!( "{id} cmd({}) is speculatively executed, exe status: {}", entry.propose_id, @@ -98,11 +95,9 @@ fn after_sync_cmds, RC: RoleChange>( ) }) .collect(); - let propose_ids = cmd_entries.iter().map(|(e, _)| e.propose_id); - let results = ce.after_sync(cmds, Some(highest_index)); - send_results(curp, results.into_iter(), resp_txs, propose_ids); + send_results(results.into_iter(), resp_txs); for (entry, _) in cmd_entries { curp.trigger(&entry.propose_id); @@ -112,33 +107,25 @@ fn after_sync_cmds, RC: RoleChange>( } /// Send cmd results to clients -fn send_results<'a, C, RC, R, S, P>(curp: &RawCurp, results: R, txs: S, propose_ids: P) +fn send_results<'a, C, R, S>(results: R, txs: S) where C: Command, - RC: RoleChange, R: Iterator, C::Error>>, S: Iterator>, - P: Iterator, { - let cb = curp.cmd_board(); - let mut cb_w = cb.write(); - - for ((result, tx_opt), id) in results.zip(txs).zip(propose_ids) { + for (result, tx_opt) in results.zip(txs) { match result { Ok(r) => { let (asr, er_opt) = r.into_parts(); let _ignore_er = tx_opt.as_ref().zip(er_opt.as_ref()).map(|(tx, er)| { tx.send_propose(ProposeResponse::new_result::(&Ok(er.clone()), true)); }); - let _ignore = er_opt.map(|er| cb_w.insert_er(id, Ok(er))); let _ignore_asr = tx_opt .as_ref() .map(|tx| tx.send_synced(SyncedResponse::new_result::(&Ok(asr.clone())))); - cb_w.insert_asr(id, Ok(asr)); } Err(e) => { let _ignore = tx_opt.as_ref().map(|tx| tx.send_err::(e.clone())); - cb_w.insert_asr(id, Err(e)); } } } diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index d0d335c98..67b6194e4 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -125,7 +125,7 @@ pub(super) struct CurpNode, RC: RoleChange> { /// `RawCurp` state machine curp: Arc>, /// Cmd watch board for tracking the cmd sync results - cmd_board: CmdBoardRef, + cmd_board: CmdBoardRef, /// Storage storage: Arc>, /// Snapshot allocator diff --git a/crates/curp/src/server/gc.rs b/crates/curp/src/server/gc.rs index 92af3aeb7..23ab10f39 100644 --- a/crates/curp/src/server/gc.rs +++ b/crates/curp/src/server/gc.rs @@ -9,7 +9,7 @@ use super::{conflict::spec_pool_new::SpeculativePoolRef, lease_manager::LeaseMan /// Garbage collects relevant objects when the client lease expires pub(super) async fn gc_client_lease( lease_mamanger: LeaseManagerRef, - cmd_board: CmdBoardRef, + cmd_board: CmdBoardRef, sp: SpeculativePoolRef, interval: Duration, shutdown_listener: Listener, @@ -23,7 +23,7 @@ pub(super) async fn gc_client_lease( } let mut lm_w = lease_mamanger.write(); - let mut board = cmd_board.write(); + let board = cmd_board.read(); let mut sp_l = sp.lock(); let expired_ids = lm_w.gc_expired(); @@ -36,161 +36,7 @@ pub(super) async fn gc_client_lease( } } for id in &expired_propose_ids { - let _ignore_er = board.er_buffer.swap_remove(id); - let _ignore_asr = board.asr_buffer.swap_remove(id); sp_l.remove_by_id(id); } } } - -#[cfg(test)] -mod tests { - use std::{sync::Arc, time::Duration}; - - use curp_test_utils::test_cmd::{TestCommand, TestCommandResult}; - use parking_lot::{Mutex, RwLock}; - use test_macros::abort_on_panic; - use utils::task_manager::{tasks::TaskName, TaskManager}; - - use crate::{ - rpc::{PoolEntry, ProposeId}, - server::{ - cmd_board::{CmdBoardRef, CommandBoard}, - conflict::{spec_pool_new::SpeculativePool, test_pools::TestSpecPool}, - gc::gc_client_lease, - lease_manager::LeaseManager, - }, - }; - - #[tokio::test] - #[abort_on_panic] - async fn cmd_board_gc_test() { - let task_manager = TaskManager::new(); - let board: CmdBoardRef = Arc::new(RwLock::new(CommandBoard::new())); - let lease_manager = Arc::new(RwLock::new(LeaseManager::new())); - let lease_manager_c = Arc::clone(&lease_manager); - let sp = Arc::new(Mutex::new(SpeculativePool::new(vec![]))); - let sp_c = Arc::clone(&sp); - task_manager.spawn(TaskName::GcClientLease, |n| { - gc_client_lease( - lease_manager_c, - Arc::clone(&board), - sp_c, - Duration::from_millis(500), - n, - ) - }); - - tokio::time::sleep(Duration::from_millis(100)).await; - let id1 = lease_manager - .write() - .grant(Some(Duration::from_millis(900))); - let id2 = lease_manager - .write() - .grant(Some(Duration::from_millis(900))); - let _ignore = board.write().tracker(id1).only_record(1); - let _ignore = board.write().tracker(id2).only_record(2); - sp.lock().insert(PoolEntry::new( - ProposeId(id1, 1), - Arc::new(TestCommand::default()), - )); - sp.lock().insert(PoolEntry::new( - ProposeId(id2, 2), - Arc::new(TestCommand::default()), - )); - board - .write() - .er_buffer - .insert(ProposeId(id1, 1), Ok(TestCommandResult::default())); - tokio::time::sleep(Duration::from_millis(100)).await; - board - .write() - .er_buffer - .insert(ProposeId(id2, 2), Ok(TestCommandResult::default())); - board - .write() - .asr_buffer - .insert(ProposeId(id1, 1), Ok(0.into())); - tokio::time::sleep(Duration::from_millis(100)).await; - board - .write() - .asr_buffer - .insert(ProposeId(id2, 2), Ok(0.into())); - - // at 600ms - tokio::time::sleep(Duration::from_millis(400)).await; - let id3 = lease_manager - .write() - .grant(Some(Duration::from_millis(500))); - board - .write() - .er_buffer - .insert(ProposeId(id3, 3), Ok(TestCommandResult::default())); - board - .write() - .asr_buffer - .insert(ProposeId(id3, 3), Ok(0.into())); - - // at 1100ms, the first two kv should be removed - tokio::time::sleep(Duration::from_millis(500)).await; - let board = board.write(); - assert_eq!(board.er_buffer.len(), 1); - assert_eq!(*board.er_buffer.get_index(0).unwrap().0, ProposeId(id3, 3)); - assert_eq!(board.asr_buffer.len(), 1); - assert_eq!(*board.asr_buffer.get_index(0).unwrap().0, ProposeId(id3, 3)); - task_manager.shutdown(true).await; - } - - #[tokio::test] - #[abort_on_panic] - async fn spec_gc_test() { - let task_manager = TaskManager::new(); - let board: CmdBoardRef = Arc::new(RwLock::new(CommandBoard::new())); - let lease_manager = Arc::new(RwLock::new(LeaseManager::new())); - let lease_manager_c = Arc::clone(&lease_manager); - let sp = Arc::new(Mutex::new(SpeculativePool::new(vec![Box::new( - TestSpecPool::default(), - )]))); - let sp_cloned = Arc::clone(&sp); - task_manager.spawn(TaskName::GcClientLease, |n| { - gc_client_lease( - lease_manager_c, - Arc::clone(&board), - sp_cloned, - Duration::from_millis(500), - n, - ) - }); - - tokio::time::sleep(Duration::from_millis(100)).await; - - let id1 = lease_manager - .write() - .grant(Some(Duration::from_millis(900))); - let id2 = lease_manager - .write() - .grant(Some(Duration::from_millis(2000))); - let _ignore = board.write().tracker(id1).only_record(1); - let cmd1 = Arc::new(TestCommand::new_put(vec![1], 1)); - sp.lock().insert(PoolEntry::new(ProposeId(id1, 1), cmd1)); - - tokio::time::sleep(Duration::from_millis(100)).await; - let _ignore = board.write().tracker(id1).only_record(2); - let cmd2 = Arc::new(TestCommand::new_put(vec![2], 1)); - sp.lock().insert(PoolEntry::new(ProposeId(id1, 2), cmd2)); - - // at 600ms - tokio::time::sleep(Duration::from_millis(400)).await; - let _ignore = board.write().tracker(id2).only_record(1); - let cmd3 = Arc::new(TestCommand::new_put(vec![3], 1)); - sp.lock() - .insert(PoolEntry::new(ProposeId(id2, 1), Arc::clone(&cmd3))); - - // at 1100ms, the first two kv should be removed - tokio::time::sleep(Duration::from_millis(500)).await; - let spec = sp.lock(); - assert_eq!(spec.len(), 1); - assert_eq!(spec.all(), vec![PoolEntry::new(ProposeId(id2, 1), cmd3)]); - task_manager.shutdown(true).await; - } -} diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 5527f8f19..07949dff9 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -136,7 +136,7 @@ pub(super) struct RawCurpArgs { /// Current node is leader or not is_leader: bool, /// Cmd board for tracking the cmd sync results - cmd_board: CmdBoardRef, + cmd_board: CmdBoardRef, /// Lease Manager lease_manager: LeaseManagerRef, /// Config @@ -389,7 +389,7 @@ struct Context { /// Client tls config client_tls_config: Option, /// Cmd board for tracking the cmd sync results - cb: CmdBoardRef, + cb: CmdBoardRef, /// The lease manager lm: LeaseManagerRef, /// Election tick @@ -1287,7 +1287,7 @@ impl RawCurp { } /// Get a reference to command board - pub(super) fn cmd_board(&self) -> CmdBoardRef { + pub(super) fn cmd_board(&self) -> CmdBoardRef { Arc::clone(&self.ctx.cb) } From ff3c8b875fbce6e676688118d137cdee7b74ac26 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 4 Nov 2024 16:24:12 +0800 Subject: [PATCH 301/322] refactor: remove curp server gc task Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/curp_node/mod.rs | 11 ------- crates/curp/src/server/gc.rs | 42 ------------------------- crates/curp/src/server/mod.rs | 3 -- crates/utils/src/task_manager/tasks.rs | 2 -- 4 files changed, 58 deletions(-) delete mode 100644 crates/curp/src/server/gc.rs diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 67b6194e4..aa3d303fc 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -34,7 +34,6 @@ use super::{ cmd_worker::execute, conflict::spec_pool_new::{SpObject, SpeculativePool}, conflict::uncommitted_pool::{UcpObject, UncommittedPool}, - gc::gc_client_lease, lease_manager::LeaseManager, raw_curp::{RawCurp, Vote}, storage::StorageApi, @@ -769,16 +768,6 @@ impl, RC: RoleChange> CurpNode { metrics::Metrics::register_callback(Arc::clone(&curp))?; - task_manager.spawn(TaskName::GcClientLease, |n| { - gc_client_lease( - lease_manager, - Arc::clone(&cmd_board), - sp, - curp_cfg.gc_interval, - n, - ) - }); - Self::run_bg_tasks( Arc::clone(&curp), Arc::clone(&cmd_executor), diff --git a/crates/curp/src/server/gc.rs b/crates/curp/src/server/gc.rs deleted file mode 100644 index 23ab10f39..000000000 --- a/crates/curp/src/server/gc.rs +++ /dev/null @@ -1,42 +0,0 @@ -use std::time::Duration; - -use utils::task_manager::Listener; - -use crate::{cmd::Command, rpc::ProposeId, server::cmd_board::CmdBoardRef}; - -use super::{conflict::spec_pool_new::SpeculativePoolRef, lease_manager::LeaseManagerRef}; - -/// Garbage collects relevant objects when the client lease expires -pub(super) async fn gc_client_lease( - lease_mamanger: LeaseManagerRef, - cmd_board: CmdBoardRef, - sp: SpeculativePoolRef, - interval: Duration, - shutdown_listener: Listener, -) { - #[allow(clippy::arithmetic_side_effects, clippy::ignored_unit_patterns)] - // introduced by tokio select - loop { - tokio::select! { - _ = tokio::time::sleep(interval) => {} - _ = shutdown_listener.wait() => break, - } - - let mut lm_w = lease_mamanger.write(); - let board = cmd_board.read(); - let mut sp_l = sp.lock(); - let expired_ids = lm_w.gc_expired(); - - let mut expired_propose_ids = Vec::new(); - for id in expired_ids { - if let Some(tracker) = board.trackers.get(&id) { - let incompleted_nums = tracker.all_incompleted(); - expired_propose_ids - .extend(incompleted_nums.into_iter().map(|num| ProposeId(id, num))); - } - } - for id in &expired_propose_ids { - sp_l.remove_by_id(id); - } - } -} diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index 7dcabad7b..a2e267c86 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -64,9 +64,6 @@ mod cmd_board; /// Conflict pools pub mod conflict; -/// Background garbage collection for Curp server -mod gc; - /// Curp Node mod curp_node; diff --git a/crates/utils/src/task_manager/tasks.rs b/crates/utils/src/task_manager/tasks.rs index 3ed500c2b..5837e72ab 100644 --- a/crates/utils/src/task_manager/tasks.rs +++ b/crates/utils/src/task_manager/tasks.rs @@ -43,7 +43,6 @@ enum_with_iter! { TonicServer, Election, ConfChange, - GcClientLease, RevokeExpiredLeases, SyncVictims, AutoCompactor, @@ -62,7 +61,6 @@ impl TaskName { | TaskName::LeaseKeepAlive | TaskName::TonicServer | TaskName::ConfChange - | TaskName::GcClientLease | TaskName::RevokeExpiredLeases | TaskName::SyncVictims | TaskName::AutoCompactor => false, From e7b71accda52a9625147f093973a25f6fd9788da Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 4 Nov 2024 16:32:08 +0800 Subject: [PATCH 302/322] refactor: remove curp server client lease Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/cmd_board.rs | 12 +---- .../curp/src/server/conflict/spec_pool_new.rs | 7 +-- crates/curp/src/server/curp_node/mod.rs | 53 ++----------------- crates/curp/src/server/metrics.rs | 9 ---- crates/curp/src/server/mod.rs | 26 +++------ crates/curp/src/server/raw_curp/mod.rs | 44 --------------- crates/curp/src/server/raw_curp/tests.rs | 17 ------ 7 files changed, 16 insertions(+), 152 deletions(-) diff --git a/crates/curp/src/server/cmd_board.rs b/crates/curp/src/server/cmd_board.rs index 899e002d3..f20a9b51a 100644 --- a/crates/curp/src/server/cmd_board.rs +++ b/crates/curp/src/server/cmd_board.rs @@ -1,10 +1,8 @@ -use std::{collections::HashMap, sync::Arc}; +use std::sync::Arc; use event_listener::{Event, EventListener}; use parking_lot::RwLock; -use crate::tracker::Tracker; - /// Ref to the cmd board pub(super) type CmdBoardRef = Arc>; @@ -13,8 +11,6 @@ pub(super) type CmdBoardRef = Arc>; pub(super) struct CommandBoard { /// Store the shutdown notifier shutdown_notifier: Event, - /// The result trackers track all cmd, this is used for dedup - pub(super) trackers: HashMap, } impl CommandBoard { @@ -22,15 +18,9 @@ impl CommandBoard { pub(super) fn new() -> Self { Self { shutdown_notifier: Event::new(), - trackers: HashMap::new(), } } - /// Clear, called when leader retires - pub(super) fn clear(&mut self) { - self.trackers.clear(); - } - /// Get a listener for shutdown fn shutdown_listener(&mut self) -> EventListener { self.shutdown_notifier.listen() diff --git a/crates/curp/src/server/conflict/spec_pool_new.rs b/crates/curp/src/server/conflict/spec_pool_new.rs index 97cded6f3..ad9a89c75 100644 --- a/crates/curp/src/server/conflict/spec_pool_new.rs +++ b/crates/curp/src/server/conflict/spec_pool_new.rs @@ -1,13 +1,9 @@ -use std::{collections::HashMap, sync::Arc}; +use std::collections::HashMap; use curp_external_api::conflict::SpeculativePoolOp; -use parking_lot::Mutex; use crate::rpc::{PoolEntry, ProposeId}; -/// Ref to `SpeculativePool` -pub(crate) type SpeculativePoolRef = Arc>>; - /// A speculative pool object pub type SpObject = Box> + Send + 'static>; @@ -51,6 +47,7 @@ impl SpeculativePool { let _ignore = self.entries.remove(&entry.id); } + #[allow(unused)] /// Removes an entry from the pool by it's propose id pub(crate) fn remove_by_id(&mut self, id: &ProposeId) { if let Some(entry) = self.entries.remove(id) { diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index aa3d303fc..fda111773 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -34,7 +34,6 @@ use super::{ cmd_worker::execute, conflict::spec_pool_new::{SpObject, SpeculativePool}, conflict::uncommitted_pool::{UcpObject, UncommittedPool}, - lease_manager::LeaseManager, raw_curp::{RawCurp, Vote}, storage::StorageApi, }; @@ -46,11 +45,11 @@ use crate::{ role_change::RoleChange, rpc::{ self, AppendEntriesRequest, AppendEntriesResponse, CurpError, FetchMembershipRequest, - InstallSnapshotRequest, InstallSnapshotResponse, LeaseKeepAliveMsg, MembershipResponse, - MoveLeaderRequest, MoveLeaderResponse, PoolEntry, ProposeId, ProposeRequest, - ProposeResponse, ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, - ShutdownResponse, SyncedResponse, TriggerShutdownRequest, TriggerShutdownResponse, - TryBecomeLeaderNowRequest, TryBecomeLeaderNowResponse, VoteRequest, VoteResponse, + InstallSnapshotRequest, InstallSnapshotResponse, MembershipResponse, MoveLeaderRequest, + MoveLeaderResponse, PoolEntry, ProposeId, ProposeRequest, ProposeResponse, + ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, ShutdownResponse, + SyncedResponse, TriggerShutdownRequest, TriggerShutdownResponse, TryBecomeLeaderNowRequest, + TryBecomeLeaderNowResponse, VoteRequest, VoteResponse, }, server::{ cmd_worker::{after_sync, worker_reset, worker_snapshot}, @@ -145,7 +144,6 @@ impl, RC: RoleChange> CurpNode { &self, req: &ProposeRequest, resp_tx: Arc, - bypassed: bool, ) -> Result<(), CurpError> { if self.curp.is_cluster_shutdown() { return Err(CurpError::shutting_down()); @@ -160,10 +158,6 @@ impl, RC: RoleChange> CurpNode { info!("not using slow path for: {req:?}"); } - if bypassed { - self.curp.mark_client_id_bypassed(req.propose_id().0); - } - let propose = Propose::try_new(req, resp_tx)?; let _ignore = self.propose_tx.send(propose); @@ -298,11 +292,7 @@ impl, RC: RoleChange> CurpNode { pub(super) async fn shutdown( &self, req: ShutdownRequest, - bypassed: bool, ) -> Result { - if bypassed { - self.curp.mark_client_id_bypassed(req.propose_id().0); - } self.curp.handle_shutdown(req.propose_id())?; CommandBoard::wait_for_shutdown_synced(&self.cmd_board).await; self.trigger_nodes_shutdown().await; @@ -342,37 +332,6 @@ impl, RC: RoleChange> CurpNode { } } - /// Handle lease keep alive requests - pub(super) async fn lease_keep_alive( - &self, - req_stream: impl Stream>, - ) -> Result { - pin_mut!(req_stream); - while let Some(req) = req_stream.next().await { - // NOTE: The leader may shutdown itself in configuration change. - // We must first check this situation. - self.curp.check_leader_transfer()?; - if self.curp.is_cluster_shutdown() { - return Err(CurpError::shutting_down()); - } - if self.curp.is_node_shutdown() { - return Err(CurpError::node_not_exist()); - } - if !self.curp.is_leader() { - let (leader_id, term, _) = self.curp.leader(); - return Err(CurpError::redirect(leader_id, term)); - } - let req = req.map_err(|err| { - error!("{err}"); - CurpError::RpcTransport(()) - })?; - if let Some(client_id) = self.curp.handle_lease_keep_alive(req.client_id) { - return Ok(LeaseKeepAliveMsg { client_id }); - } - } - Err(CurpError::RpcTransport(())) - } - /// Handles fetch membership requests pub(super) fn fetch_membership( &self, @@ -732,7 +691,6 @@ impl, RC: RoleChange> CurpNode { .collect(); let member_connects = rpc::inner_connects(peer_addrs, client_tls_config.as_ref()).collect(); let cmd_board = Arc::new(RwLock::new(CommandBoard::new())); - let lease_manager = Arc::new(RwLock::new(LeaseManager::new())); let last_applied = cmd_executor .last_applied() .map_err(|e| CurpError::internal(format!("get applied index error, {e}")))?; @@ -746,7 +704,6 @@ impl, RC: RoleChange> CurpNode { RawCurp::builder() .is_leader(is_leader) .cmd_board(Arc::clone(&cmd_board)) - .lease_manager(Arc::clone(&lease_manager)) .cfg(Arc::clone(&curp_cfg)) .role_change(role_change) .task_manager(Arc::clone(&task_manager)) diff --git a/crates/curp/src/server/metrics.rs b/crates/curp/src/server/metrics.rs index 16836636c..57b7fe689 100644 --- a/crates/curp/src/server/metrics.rs +++ b/crates/curp/src/server/metrics.rs @@ -55,7 +55,6 @@ impl Metrics { is_learner, server_id, sp_cnt, - online_clients, proposals_committed, proposals_applied, proposals_pending, @@ -80,10 +79,6 @@ impl Metrics { .u64_observable_gauge("sp_cnt") .with_description("The speculative pool size of this server") .init(), - meter - .u64_observable_gauge("online_clients") - .with_description("The online client ids count of this server if it is the leader") - .init(), meter .u64_observable_gauge("proposals_committed") .with_description("The total number of consensus proposals committed.") @@ -105,7 +100,6 @@ impl Metrics { is_learner.as_any(), server_id.as_any(), sp_cnt.as_any(), - online_clients.as_any(), ], move |observer| { let (leader_id, _, leader) = curp.leader(); @@ -120,9 +114,6 @@ impl Metrics { let sp_size = curp.spec_pool().lock().len(); observer.observe_u64(&sp_cnt, sp_size.numeric_cast(), &[]); - let client_count = curp.lease_manager().read().online_clients(); - observer.observe_u64(&online_clients, client_count.numeric_cast(), &[]); - let commit_index = curp.commit_index(); let last_log_index = curp.last_log_index(); diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index a2e267c86..13d230d0b 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use engine::SnapshotAllocator; use flume::r#async::RecvStream; -use futures::{Stream, StreamExt}; +use futures::Stream; #[cfg(not(madsim))] use tonic::transport::ClientTlsConfig; use tracing::instrument; @@ -22,7 +22,6 @@ use crate::cmd::CommandExecutor; use crate::member::MembershipInfo; use crate::response::ResponseSender; use crate::role_change::RoleChange; -use crate::rpc::connect::Bypass; use crate::rpc::AppendEntriesRequest; use crate::rpc::AppendEntriesResponse; use crate::rpc::ChangeMembershipRequest; @@ -70,6 +69,7 @@ mod curp_node; /// Storage mod storage; +#[cfg(ignore)] /// Lease Manager mod lease_manager; @@ -108,11 +108,9 @@ impl, RC: RoleChange> crate::rpc::Protocol fo &self, request: tonic::Request, ) -> Result, tonic::Status> { - let bypassed = request.metadata().is_bypassed(); let (tx, rx) = flume::bounded(2); let resp_tx = Arc::new(ResponseSender::new(tx)); - self.inner - .propose_stream(&request.into_inner(), resp_tx, bypassed)?; + self.inner.propose_stream(&request.into_inner(), resp_tx)?; Ok(tonic::Response::new(rx.into_stream())) } @@ -140,10 +138,9 @@ impl, RC: RoleChange> crate::rpc::Protocol fo &self, request: tonic::Request, ) -> Result, tonic::Status> { - let bypassed = request.metadata().is_bypassed(); request.metadata().extract_span(); Ok(tonic::Response::new( - self.inner.shutdown(request.into_inner(), bypassed).await?, + self.inner.shutdown(request.into_inner()).await?, )) } @@ -169,12 +166,9 @@ impl, RC: RoleChange> crate::rpc::Protocol fo #[allow(clippy::unimplemented)] async fn lease_keep_alive( &self, - request: tonic::Request>, + _request: tonic::Request>, ) -> Result, tonic::Status> { - let req_stream = request.into_inner(); - Ok(tonic::Response::new( - self.inner.lease_keep_alive(req_stream).await?, - )) + Err(tonic::Status::unimplemented("unimplemented")) } #[instrument(skip_all, name = "curp_fetch_membership")] @@ -287,13 +281,9 @@ impl, RC: RoleChange> StreamingProtocol for R #[instrument(skip_all, name = "lease_keep_alive")] async fn lease_keep_alive( &self, - request: impl Stream + Send, + _request: impl Stream + Send, ) -> Result, tonic::Status> { - let stream = request.map(Ok::<_, std::io::Error>); - - Ok(tonic::Response::new( - self.inner.lease_keep_alive(stream).await?, - )) + Err(tonic::Status::unimplemented("unimplemented")) } } diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 07949dff9..66951f511 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -56,7 +56,6 @@ use self::state::State; use super::conflict::spec_pool_new::SpeculativePool; use super::conflict::uncommitted_pool::UncommittedPool; use super::curp_node::TaskType; -use super::lease_manager::LeaseManagerRef; use super::storage::StorageApi; use super::DB; use crate::cmd::Command; @@ -137,8 +136,6 @@ pub(super) struct RawCurpArgs { is_leader: bool, /// Cmd board for tracking the cmd sync results cmd_board: CmdBoardRef, - /// Lease Manager - lease_manager: LeaseManagerRef, /// Config cfg: Arc, /// Role change callback @@ -186,7 +183,6 @@ impl RawCurpBuilder { let ctx = Context::builder() .cb(args.cmd_board) - .lm(args.lease_manager) .cfg(args.cfg) .role_change(args.role_change) .curp_storage(args.curp_storage) @@ -390,8 +386,6 @@ struct Context { client_tls_config: Option, /// Cmd board for tracking the cmd sync results cb: CmdBoardRef, - /// The lease manager - lm: LeaseManagerRef, /// Election tick #[builder(setter(skip))] election_tick: AtomicU8, @@ -438,10 +432,6 @@ impl ContextBuilder { Some(value) => value, None => return Err(ContextBuilderError::UninitializedField("cb")), }, - lm: match self.lm.take() { - Some(value) => value, - None => return Err(ContextBuilderError::UninitializedField("lm")), - }, election_tick: AtomicU8::new(0), leader_event: Arc::new(Event::new()), role_change: match self.role_change.take() { @@ -728,22 +718,6 @@ impl RawCurp { Ok(()) } - /// Handle `lease_keep_alive` message - pub(super) fn handle_lease_keep_alive(&self, client_id: u64) -> Option { - let mut lm_w = self.ctx.lm.write(); - if client_id == 0 { - return Some(lm_w.grant(None)); - } - if lm_w.check_alive(client_id) { - lm_w.renew(client_id, None); - None - } else { - metrics::get().client_id_revokes.add(1, &[]); - lm_w.revoke(client_id); - Some(lm_w.grant(None)) - } - } - /// Handle `append_entries` /// Return `Ok(term, entries)` if succeeds /// Return `Err(term, hint_index)` if fails @@ -1291,11 +1265,6 @@ impl RawCurp { Arc::clone(&self.ctx.cb) } - /// Get the lease manager - pub(super) fn lease_manager(&self) -> LeaseManagerRef { - Arc::clone(&self.ctx.lm) - } - /// Get a reference to spec pool pub(super) fn spec_pool(&self) -> &Mutex> { &self.ctx.spec_pool @@ -1314,11 +1283,6 @@ impl RawCurp { .unwrap_or_else(|| unreachable!("server id {id} not found")) } - /// Check if the current node is shutting down - pub(super) fn is_node_shutdown(&self) -> bool { - self.task_manager.is_node_shutdown() - } - /// Check if the current node is shutting down pub(super) fn is_cluster_shutdown(&self) -> bool { self.task_manager.is_cluster_shutdown() @@ -1363,12 +1327,6 @@ impl RawCurp { self.log.read().last_as } - /// Mark a client id as bypassed - pub(super) fn mark_client_id_bypassed(&self, client_id: u64) { - let mut lm_w = self.ctx.lm.write(); - lm_w.bypass(client_id); - } - /// Get client tls config pub(super) fn client_tls_config(&self) -> Option<&ClientTlsConfig> { self.ctx.client_tls_config.as_ref() @@ -1687,8 +1645,6 @@ impl RawCurp { /// When leader retires, it should reset state fn leader_retires(&self) { debug!("leader {} retires", self.id()); - self.ctx.cb.write().clear(); - self.ctx.lm.write().clear(); self.ctx.uncommitted_pool.lock().clear(); self.lst.reset_no_op_state(); } diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index 34ee29a45..04d72dff3 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -14,9 +14,7 @@ use crate::{ server::{ cmd_board::CommandBoard, conflict::test_pools::{TestSpecPool, TestUncomPool}, - lease_manager::LeaseManager, }, - tracker::Tracker, LogIndex, }; @@ -41,7 +39,6 @@ impl RawCurp { ) -> Self { let _peer_ids: Vec<_> = (1..n).collect(); let cmd_board = Arc::new(RwLock::new(CommandBoard::new())); - let lease_manager = Arc::new(RwLock::new(LeaseManager::new())); let curp_config = CurpConfigBuilder::default() .log_entries_cap(10) .build() @@ -49,9 +46,6 @@ impl RawCurp { let curp_storage = Arc::new(DB::open(&curp_config.engine_cfg).unwrap()); let _ignore = curp_storage.recover().unwrap(); - // bypass test client id - lease_manager.write().bypass(TEST_CLIENT_ID); - let sp = Arc::new(Mutex::new(SpeculativePool::new(vec![Box::new( TestSpecPool::default(), )]))); @@ -78,7 +72,6 @@ impl RawCurp { Self::builder() .is_leader(true) .cmd_board(cmd_board) - .lease_manager(lease_manager) .cfg(Arc::new(curp_config)) .role_change(role_change) .task_manager(task_manager) @@ -94,16 +87,6 @@ impl RawCurp { .unwrap() } - pub(crate) fn tracker(&self, client_id: u64) -> Tracker { - self.ctx - .cb - .read() - .trackers - .get(&client_id) - .cloned() - .unwrap_or_else(|| unreachable!("cannot find {client_id} in result trackers")) - } - /// Add a new cmd to the log, will return log entry index pub(crate) fn push_cmd(&self, propose_id: ProposeId, cmd: Arc) -> LogIndex { let st_r = self.st.read(); From faf81e59bea1d6c1f88421ead2e753fb7bf4d8a9 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 4 Nov 2024 17:00:22 +0800 Subject: [PATCH 303/322] refactor: refine cmd worker implementation Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/cmd_worker/mod.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/crates/curp/src/server/cmd_worker/mod.rs b/crates/curp/src/server/cmd_worker/mod.rs index 83704d8a7..b7b89d993 100644 --- a/crates/curp/src/server/cmd_worker/mod.rs +++ b/crates/curp/src/server/cmd_worker/mod.rs @@ -45,21 +45,21 @@ pub(super) fn execute, RC: RoleChange>( ce: &CE, curp: &RawCurp, ) -> Result, ::Error> { - let id = curp.id(); let EntryData::Command(ref cmd) = entry.entry_data else { unreachable!("should not speculative execute {:?}", entry.entry_data); }; - if cmd.is_read_only() { + let result = if cmd.is_read_only() { ce.execute_ro(cmd).map(|(er, asr)| (er, Some(asr))) } else { - let er = ce.execute(cmd); - debug!( - "{id} cmd({}) is speculatively executed, exe status: {}", - entry.propose_id, - er.is_ok(), - ); - er.map(|e| (e, None)) - } + ce.execute(cmd).map(|er| (er, None)) + }; + debug!( + "{} cmd({}) is speculatively executed, exe status: {}", + curp.id(), + entry.propose_id, + result.is_ok(), + ); + result } /// After sync cmd entries From 098c15e97fdf4cec6c178adff575c0351f5b4120 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 4 Nov 2024 17:23:05 +0800 Subject: [PATCH 304/322] refactor: remove unused rpc implementations Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/proto/common | 2 +- crates/curp/src/client/retry.rs | 13 ++------- crates/curp/src/client/tests.rs | 12 ++++---- crates/curp/src/rpc/connect/lazy.rs | 23 +++------------ crates/curp/src/rpc/connect/mod.rs | 39 +++---------------------- crates/curp/src/rpc/mod.rs | 4 --- crates/curp/src/rpc/reconnect.rs | 14 ++------- crates/curp/src/server/mod.rs | 10 ------- crates/xline/src/server/auth_wrapper.rs | 16 +++------- 9 files changed, 24 insertions(+), 109 deletions(-) diff --git a/crates/curp/proto/common b/crates/curp/proto/common index a887141b1..5f29de820 160000 --- a/crates/curp/proto/common +++ b/crates/curp/proto/common @@ -1 +1 @@ -Subproject commit a887141b18371357ad811b4a9bcae584e26e2f01 +Subproject commit 5f29de8204724bf760b9efe9f87d9c095601f8b1 diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index dd0306c03..7f317ed08 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -23,7 +23,7 @@ use super::{ use crate::{ members::ServerId, rpc::{ - Change, CurpError, MembershipResponse, Node, NodeMetadata, ProposeId, ReadState, Redirect, + Change, CurpError, MembershipResponse, Node, NodeMetadata, ProposeId, Redirect, WaitLearnerResponse, }, tracker::Tracker, @@ -118,22 +118,15 @@ impl Backoff { pub(crate) struct Context { /// The propose id propose_id: ProposeId, - /// First incomplete sequence - first_incomplete: u64, /// The current cluster state cluster_state: ClusterStateFull, } impl Context { /// Creates a new `Context` - pub(crate) fn new( - propose_id: ProposeId, - first_incomplete: u64, - cluster_state: ClusterStateFull, - ) -> Self { + pub(crate) fn new(propose_id: ProposeId, cluster_state: ClusterStateFull) -> Self { Self { propose_id, - first_incomplete, cluster_state, } } @@ -241,7 +234,7 @@ impl Retry { let propose_id = ProposeId(self.client_id, rand::random()); let cluster_state = self.cluster_state.ready_or_fetch().await?; // TODO: gen propose id - Ok(Context::new(propose_id, 0, cluster_state)) + Ok(Context::new(propose_id, cluster_state)) } /// Updates the cluster state when error occurs. diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index bcab76645..108e17e93 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -148,7 +148,7 @@ async fn test_unary_propose_fast_path_works() { }); let unary = init_unary_client(None, None); let cluster_state = ClusterStateFull::new(0, 1, connects, build_default_membership()); - let ctx = Context::new(ProposeId::default(), 0, cluster_state); + let ctx = Context::new(ProposeId::default(), cluster_state); let res = unary .propose(&TestCommand::new_put(vec![1], 1), None, true, ctx) .await @@ -184,7 +184,7 @@ async fn test_unary_propose_slow_path_works() { let unary = init_unary_client(None, None); let cluster_state = ClusterStateFull::new(0, 1, connects, build_default_membership()); - let ctx = Context::new(ProposeId::default(), 0, cluster_state); + let ctx = Context::new(ProposeId::default(), cluster_state); let start_at = Instant::now(); let res = unary .propose(&TestCommand::new_put(vec![1], 1), None, false, ctx) @@ -230,7 +230,7 @@ async fn test_unary_propose_fast_path_fallback_slow_path() { let unary = init_unary_client(None, None); let cluster_state = ClusterStateFull::new(0, 1, connects, build_default_membership()); - let ctx = Context::new(ProposeId::default(), 0, cluster_state); + let ctx = Context::new(ProposeId::default(), cluster_state); let start_at = Instant::now(); let res = unary .propose(&TestCommand::new_put(vec![1], 1), None, true, ctx) @@ -278,7 +278,7 @@ async fn test_unary_propose_return_early_err() { let unary = init_unary_client(None, None); let cluster_state = ClusterStateFull::new(0, 1, connects, build_default_membership()); - let ctx = Context::new(ProposeId::default(), 0, cluster_state); + let ctx = Context::new(ProposeId::default(), cluster_state); let err = unary .propose(&TestCommand::new_put(vec![1], 1), None, true, ctx) .await @@ -466,7 +466,7 @@ async fn test_read_index_success() { let unary = init_unary_client(None, None); let cluster_state = ClusterStateFull::new(0, 1, connects, build_default_membership()); - let ctx = Context::new(ProposeId::default(), 0, cluster_state); + let ctx = Context::new(ProposeId::default(), cluster_state); let res = unary .propose(&TestCommand::default(), None, true, ctx) .await @@ -501,7 +501,7 @@ async fn test_read_index_fail() { }); let unary = init_unary_client(None, None); let cluster_state = ClusterStateFull::new(0, 1, connects, build_default_membership()); - let ctx = Context::new(ProposeId::default(), 0, cluster_state); + let ctx = Context::new(ProposeId::default(), cluster_state); let res = unary .propose(&TestCommand::default(), None, true, ctx) .await; diff --git a/crates/curp/src/rpc/connect/lazy.rs b/crates/curp/src/rpc/connect/lazy.rs index 5823ca9d3..e186cd392 100644 --- a/crates/curp/src/rpc/connect/lazy.rs +++ b/crates/curp/src/rpc/connect/lazy.rs @@ -15,11 +15,10 @@ use crate::{ inner_messagepb::inner_protocol_client::InnerProtocolClient, }, AppendEntriesRequest, AppendEntriesResponse, ChangeMembershipRequest, CurpError, - FetchMembershipRequest, FetchReadStateRequest, FetchReadStateResponse, - InstallSnapshotResponse, MembershipResponse, MoveLeaderRequest, MoveLeaderResponse, - OpResponse, ProposeRequest, ReadIndexResponse, RecordRequest, RecordResponse, - ShutdownRequest, ShutdownResponse, VoteRequest, VoteResponse, WaitLearnerRequest, - WaitLearnerResponse, + FetchMembershipRequest, InstallSnapshotResponse, MembershipResponse, MoveLeaderRequest, + MoveLeaderResponse, OpResponse, ProposeRequest, ReadIndexResponse, RecordRequest, + RecordResponse, ShutdownRequest, ShutdownResponse, VoteRequest, VoteResponse, + WaitLearnerRequest, WaitLearnerResponse, }, snapshot::Snapshot, }; @@ -194,20 +193,6 @@ impl ConnectApi for ConnectLazy> { inner.as_ref().unwrap().shutdown(request, timeout).await } - async fn fetch_read_state( - &self, - request: FetchReadStateRequest, - timeout: Duration, - ) -> Result, CurpError> { - let mut inner = self.inner.lock().await; - self.connect_inner(&mut inner); - inner - .as_ref() - .unwrap() - .fetch_read_state(request, timeout) - .await - } - async fn move_leader( &self, request: MoveLeaderRequest, diff --git a/crates/curp/src/rpc/connect/mod.rs b/crates/curp/src/rpc/connect/mod.rs index a0463facc..37e15a442 100644 --- a/crates/curp/src/rpc/connect/mod.rs +++ b/crates/curp/src/rpc/connect/mod.rs @@ -40,11 +40,10 @@ use crate::{ commandpb::protocol_client::ProtocolClient, inner_messagepb::inner_protocol_client::InnerProtocolClient, }, - AppendEntriesRequest, AppendEntriesResponse, CurpError, FetchReadStateRequest, - FetchReadStateResponse, InstallSnapshotRequest, InstallSnapshotResponse, LeaseKeepAliveMsg, - MoveLeaderRequest, MoveLeaderResponse, ProposeRequest, Protocol, ShutdownRequest, - ShutdownResponse, TriggerShutdownRequest, TryBecomeLeaderNowRequest, VoteRequest, - VoteResponse, + AppendEntriesRequest, AppendEntriesResponse, CurpError, InstallSnapshotRequest, + InstallSnapshotResponse, LeaseKeepAliveMsg, MoveLeaderRequest, MoveLeaderResponse, + ProposeRequest, Protocol, ShutdownRequest, ShutdownResponse, TriggerShutdownRequest, + TryBecomeLeaderNowRequest, VoteRequest, VoteResponse, }, server::StreamingProtocol, snapshot::Snapshot, @@ -202,13 +201,6 @@ pub(crate) trait ConnectApi: Send + Sync + 'static { timeout: Duration, ) -> Result, CurpError>; - /// Send `FetchReadStateRequest` - async fn fetch_read_state( - &self, - request: FetchReadStateRequest, - timeout: Duration, - ) -> Result, CurpError>; - /// Send `MoveLeaderRequest` async fn move_leader( &self, @@ -467,17 +459,6 @@ impl ConnectApi for Connect> { with_timeout!(timeout, client.shutdown(req)).map_err(Into::into) } - /// Send `FetchReadStateRequest` - async fn fetch_read_state( - &self, - request: FetchReadStateRequest, - timeout: Duration, - ) -> Result, CurpError> { - let mut client = self.rpc_connect.clone(); - let req = tonic::Request::new(request); - with_timeout!(timeout, client.fetch_read_state(req)).map_err(Into::into) - } - /// Send `MoveLeaderRequest` async fn move_leader( &self, @@ -756,18 +737,6 @@ where self.server.shutdown(req).await.map_err(Into::into) } - /// Send `FetchReadStateRequest` - async fn fetch_read_state( - &self, - request: FetchReadStateRequest, - _timeout: Duration, - ) -> Result, CurpError> { - let mut req = tonic::Request::new(request); - req.metadata_mut().inject_bypassed(); - req.metadata_mut().inject_current(); - self.server.fetch_read_state(req).await.map_err(Into::into) - } - /// Send `MoveLeaderRequest` async fn move_leader( &self, diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index d97bca0d0..76ac2b021 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -22,15 +22,12 @@ pub use self::proto::{ cmd_result::Result as CmdResultInner, curp_error::Err as CurpError, // easy for match curp_error::Redirect, - fetch_read_state_response::{IdSet, ReadState}, op_response::Op as ResponseOp, protocol_client, protocol_server::{Protocol, ProtocolServer}, ChangeMembershipRequest, CmdResult, FetchMembershipRequest, - FetchReadStateRequest, - FetchReadStateResponse, LeaseKeepAliveMsg, Member, MembershipResponse, @@ -150,7 +147,6 @@ impl ProposeRequest { cluster_version, term, slow_path, - first_incomplete: 0, } } diff --git a/crates/curp/src/rpc/reconnect.rs b/crates/curp/src/rpc/reconnect.rs index 57dfb1634..609039a8c 100644 --- a/crates/curp/src/rpc/reconnect.rs +++ b/crates/curp/src/rpc/reconnect.rs @@ -8,9 +8,8 @@ use crate::{ members::ServerId, rpc::{ connect::ConnectApi, ChangeMembershipRequest, CurpError, FetchMembershipRequest, - FetchReadStateRequest, FetchReadStateResponse, MembershipResponse, MoveLeaderRequest, - MoveLeaderResponse, OpResponse, ProposeRequest, ReadIndexResponse, RecordRequest, - RecordResponse, ShutdownRequest, ShutdownResponse, + MembershipResponse, MoveLeaderRequest, MoveLeaderResponse, OpResponse, ProposeRequest, + ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, ShutdownResponse, }, }; @@ -128,15 +127,6 @@ impl ConnectApi for Reconnect { execute_with_reconnect!(self, ConnectApi::shutdown, request, timeout) } - /// Send `FetchReadStateRequest` - async fn fetch_read_state( - &self, - request: FetchReadStateRequest, - timeout: Duration, - ) -> Result, CurpError> { - execute_with_reconnect!(self, ConnectApi::fetch_read_state, request, timeout) - } - /// Send `MoveLeaderRequest` async fn move_leader( &self, diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index 13d230d0b..3e5b2acab 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -26,8 +26,6 @@ use crate::rpc::AppendEntriesRequest; use crate::rpc::AppendEntriesResponse; use crate::rpc::ChangeMembershipRequest; use crate::rpc::FetchMembershipRequest; -use crate::rpc::FetchReadStateRequest; -use crate::rpc::FetchReadStateResponse; use crate::rpc::InstallSnapshotRequest; use crate::rpc::InstallSnapshotResponse; use crate::rpc::LeaseKeepAliveMsg; @@ -144,14 +142,6 @@ impl, RC: RoleChange> crate::rpc::Protocol fo )) } - #[instrument(skip_all, name = "curp_fetch_read_state")] - async fn fetch_read_state( - &self, - _request: tonic::Request, - ) -> Result, tonic::Status> { - Err(tonic::Status::unimplemented("unimplemented")) - } - #[instrument(skip_all, name = "curp_move_leader")] async fn move_leader( &self, diff --git a/crates/xline/src/server/auth_wrapper.rs b/crates/xline/src/server/auth_wrapper.rs index e826c0afa..19d6e5758 100644 --- a/crates/xline/src/server/auth_wrapper.rs +++ b/crates/xline/src/server/auth_wrapper.rs @@ -3,11 +3,10 @@ use std::sync::Arc; use curp::{ cmd::PbCodec, rpc::{ - ChangeMembershipRequest, FetchMembershipRequest, FetchReadStateRequest, - FetchReadStateResponse, LeaseKeepAliveMsg, MembershipResponse, MoveLeaderRequest, - MoveLeaderResponse, OpResponse, ProposeRequest, Protocol, ReadIndexRequest, - ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, ShutdownResponse, - WaitLearnerRequest, WaitLearnerResponse, + ChangeMembershipRequest, FetchMembershipRequest, LeaseKeepAliveMsg, MembershipResponse, + MoveLeaderRequest, MoveLeaderResponse, OpResponse, ProposeRequest, Protocol, + ReadIndexRequest, ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, + ShutdownResponse, WaitLearnerRequest, WaitLearnerResponse, }, }; use flume::r#async::RecvStream; @@ -80,13 +79,6 @@ impl Protocol for AuthWrapper { self.curp_server.shutdown(request).await } - async fn fetch_read_state( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - self.curp_server.fetch_read_state(request).await - } - async fn move_leader( &self, request: tonic::Request, From ad251a2ee121e1afa87ae9b54a1a468b566caf64 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 4 Nov 2024 18:26:41 +0800 Subject: [PATCH 305/322] test: fix tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/tests.rs | 39 ++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index 108e17e93..684baac06 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -346,9 +346,9 @@ async fn test_retry_propose_return_retry_error() { }); if id == 0 { let err = early_err.clone(); - conn.expect_propose_stream() + conn.expect_shutdown() .times(5) // propose should be retried in 5 times on leader - .returning(move |_req, _token, _timeout| Err(err.clone())); + .returning(move |_req, _timeout| Err(err.clone())); } let err = early_err.clone(); @@ -364,19 +364,37 @@ async fn test_retry_propose_return_retry_error() { Fetch::new(Duration::from_secs(1), move |_| connects.clone()), ClusterState::Full(cluster_state), ); - let _err = retry - .propose(&TestCommand::new_put(vec![1], 1), None, false) - .await - .unwrap_err(); + // Propose shutdown is a retryable request + let _err = retry.propose_shutdown().await.unwrap_err(); } } #[traced_test] #[tokio::test] async fn test_retry_will_update_state_on_error() { - let connects = init_mocked_connects(5, |_id, conn| { + let mut return_cnt = [0; 5]; + let connects = init_mocked_connects(5, |id, conn| { conn.expect_propose_stream() - .returning(move |_req, _token, _timeout| Err(CurpError::wrong_cluster_version())); + .returning(move |_req, _token, _timeout| { + return_cnt[id] += 1; + match return_cnt[id] { + // on first propose, return an error; the client should update its state + 1 => Err(CurpError::wrong_cluster_version()), + // on second propose, return success result + 2 => { + let resp = async_stream::stream! { + yield Ok(build_propose_response(false)); + tokio::time::sleep(Duration::from_millis(100)).await; + yield Ok(build_synced_response()); + }; + Ok(tonic::Response::new(Box::new(resp))) + } + _ => unreachable!(), + } + }); + + conn.expect_record() + .return_once(move |_req, _timeout| Err(CurpError::internal("none"))); conn.expect_fetch_membership() .returning(move |_req, _timeout| { @@ -395,6 +413,11 @@ async fn test_retry_will_update_state_on_error() { .propose(&TestCommand::new_put(vec![1], 1), None, false) .await .unwrap_err(); + // on a retry the client should update the cluster state + let _result = retry + .propose(&TestCommand::new_put(vec![1], 1), None, false) + .await + .unwrap(); // The state should update to the new membership let state = retry.cluster_state().unwrap_full_state(); From d9d36b6fcd0d15d321bf2bc8701110a4a24ad5ac Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 4 Nov 2024 18:31:24 +0800 Subject: [PATCH 306/322] test: fix membership test Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/raw_curp/tests.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index 04d72dff3..41982909a 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -673,8 +673,7 @@ fn add_exists_node_should_have_no_effect() { exists_node_id, NodeMetadata::default(), )))) - .unwrap() - .is_empty()); + .is_none()); assert!(curp .generate_membership(Some(Change::Promote(exists_node_id))) .unwrap() From f53080acd63cf8175b2d43825191ac6cad054f75 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 5 Nov 2024 16:41:02 +0800 Subject: [PATCH 307/322] feat: add SyncSpecPool rpc definition Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/proto/inner_message.proto | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crates/curp/proto/inner_message.proto b/crates/curp/proto/inner_message.proto index f8b89d680..4beae0354 100644 --- a/crates/curp/proto/inner_message.proto +++ b/crates/curp/proto/inner_message.proto @@ -54,6 +54,13 @@ message TryBecomeLeaderNowRequest {} message TryBecomeLeaderNowResponse {} +message SyncSpecPoolRequest { + uint64 term = 1; + bytes sp_data = 2; +} + +message SyncSpecPoolResponse {} + service InnerProtocol { rpc AppendEntries(AppendEntriesRequest) returns (AppendEntriesResponse); rpc Vote(VoteRequest) returns (VoteResponse); @@ -62,4 +69,5 @@ service InnerProtocol { rpc TriggerShutdown(TriggerShutdownRequest) returns (TriggerShutdownResponse); rpc TryBecomeLeaderNow(TryBecomeLeaderNowRequest) returns (TryBecomeLeaderNowResponse); + rpc SyncSpecPool(SyncSpecPoolRequest) returns (SyncSpecPoolResponse); } From 76272a91cd2b2c7cb4e1764c86002c631f57217e Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 5 Nov 2024 17:09:27 +0800 Subject: [PATCH 308/322] refactor: spec pool get all entry implementation Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/conflict/spec_pool_new.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/crates/curp/src/server/conflict/spec_pool_new.rs b/crates/curp/src/server/conflict/spec_pool_new.rs index ad9a89c75..d2156609a 100644 --- a/crates/curp/src/server/conflict/spec_pool_new.rs +++ b/crates/curp/src/server/conflict/spec_pool_new.rs @@ -59,11 +59,12 @@ impl SpeculativePool { /// Returns all entries in the pool pub(crate) fn all(&self) -> Vec> { - let mut entries = Vec::new(); - for csp in &self.command_sps { - entries.extend(csp.all().into_iter().map(Into::into)); - } - entries + self.all_ref().map(PoolEntry::clone).collect() + } + + /// Returns all entry refs in the pool + pub(crate) fn all_ref(&self) -> impl Iterator> { + self.entries.values() } /// Returns the number of entries in the pool From 454fa2b8f914abc020d9ea27ad4da1328be08b5d Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 5 Nov 2024 17:09:47 +0800 Subject: [PATCH 309/322] feat: implement spec pool sync Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/proto/inner_message.proto | 2 +- crates/curp/src/rpc/connect/lazy.rs | 18 +++- crates/curp/src/rpc/connect/mod.rs | 28 ++++++- crates/curp/src/rpc/mod.rs | 6 +- .../curp/src/server/conflict/spec_pool_new.rs | 23 ++++- crates/curp/src/server/curp_node/mod.rs | 23 ++++- .../curp/src/server/curp_node/replication.rs | 83 ++++++++++++++++++- crates/curp/src/server/mod.rs | 12 +++ crates/curp/src/server/raw_curp/mod.rs | 6 ++ .../curp/src/server/raw_curp/replication.rs | 16 ++++ crates/utils/src/config.rs | 13 +++ 11 files changed, 216 insertions(+), 14 deletions(-) diff --git a/crates/curp/proto/inner_message.proto b/crates/curp/proto/inner_message.proto index 4beae0354..e8d249b0f 100644 --- a/crates/curp/proto/inner_message.proto +++ b/crates/curp/proto/inner_message.proto @@ -56,7 +56,7 @@ message TryBecomeLeaderNowResponse {} message SyncSpecPoolRequest { uint64 term = 1; - bytes sp_data = 2; + bytes ids = 2; } message SyncSpecPoolResponse {} diff --git a/crates/curp/src/rpc/connect/lazy.rs b/crates/curp/src/rpc/connect/lazy.rs index e186cd392..da48248c7 100644 --- a/crates/curp/src/rpc/connect/lazy.rs +++ b/crates/curp/src/rpc/connect/lazy.rs @@ -17,8 +17,8 @@ use crate::{ AppendEntriesRequest, AppendEntriesResponse, ChangeMembershipRequest, CurpError, FetchMembershipRequest, InstallSnapshotResponse, MembershipResponse, MoveLeaderRequest, MoveLeaderResponse, OpResponse, ProposeRequest, ReadIndexResponse, RecordRequest, - RecordResponse, ShutdownRequest, ShutdownResponse, VoteRequest, VoteResponse, - WaitLearnerRequest, WaitLearnerResponse, + RecordResponse, ShutdownRequest, ShutdownResponse, SyncSpecPoolRequest, + SyncSpecPoolResponse, VoteRequest, VoteResponse, WaitLearnerRequest, WaitLearnerResponse, }, snapshot::Snapshot, }; @@ -131,6 +131,20 @@ impl InnerConnectApi for ConnectLazy> { self.connect_inner(&mut inner); inner.as_ref().unwrap().try_become_leader_now(timeout).await } + + async fn sync_spec_pool( + &self, + request: SyncSpecPoolRequest, + timeout: Duration, + ) -> Result, tonic::Status> { + let mut inner = self.inner.lock().await; + self.connect_inner(&mut inner); + inner + .as_ref() + .unwrap() + .sync_spec_pool(request, timeout) + .await + } } #[allow(clippy::unwrap_used)] diff --git a/crates/curp/src/rpc/connect/mod.rs b/crates/curp/src/rpc/connect/mod.rs index 37e15a442..781f86af5 100644 --- a/crates/curp/src/rpc/connect/mod.rs +++ b/crates/curp/src/rpc/connect/mod.rs @@ -53,7 +53,8 @@ use super::{ proto::commandpb::{ReadIndexRequest, ReadIndexResponse}, reconnect::Reconnect, ChangeMembershipRequest, FetchMembershipRequest, MembershipResponse, OpResponse, RecordRequest, - RecordResponse, WaitLearnerRequest, WaitLearnerResponse, + RecordResponse, SyncSpecPoolRequest, SyncSpecPoolResponse, WaitLearnerRequest, + WaitLearnerResponse, }; /// Install snapshot chunk size: 64KB @@ -273,6 +274,13 @@ pub(crate) trait InnerConnectApi: Send + Sync + 'static { /// Send `TryBecomeLeaderNowRequest` async fn try_become_leader_now(&self, timeout: Duration) -> Result<(), tonic::Status>; + + /// Send `SyncSpecPoolRequest` + async fn sync_spec_pool( + &self, + request: SyncSpecPoolRequest, + timeout: Duration, + ) -> Result, tonic::Status>; } /// Inner Connect Api Wrapper @@ -616,6 +624,24 @@ impl InnerConnectApi for Connect> { result.map(|_| ()) } + + async fn sync_spec_pool( + &self, + request: SyncSpecPoolRequest, + timeout: Duration, + ) -> Result, tonic::Status> { + #[cfg(feature = "client-metrics")] + let start_at = self.before_rpc::(); + + let mut client = self.rpc_connect.clone(); + let req = tonic::Request::new(request); + let result = with_timeout!(timeout, client.sync_spec_pool(req)); + + #[cfg(feature = "client-metrics")] + self.after_rpc(start_at, &result); + + result + } } /// A connect api implementation which bypass kernel to dispatch method diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index 76ac2b021..c48f228c2 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -11,9 +11,9 @@ pub(crate) use self::proto::{ commandpb::CurpError as CurpErrorWrapper, inner_messagepb::{ inner_protocol_server::InnerProtocol, AppendEntriesRequest, AppendEntriesResponse, - InstallSnapshotRequest, InstallSnapshotResponse, TriggerShutdownRequest, - TriggerShutdownResponse, TryBecomeLeaderNowRequest, TryBecomeLeaderNowResponse, - VoteRequest, VoteResponse, + InstallSnapshotRequest, InstallSnapshotResponse, SyncSpecPoolRequest, SyncSpecPoolResponse, + TriggerShutdownRequest, TriggerShutdownResponse, TryBecomeLeaderNowRequest, + TryBecomeLeaderNowResponse, VoteRequest, VoteResponse, }, }; pub use self::proto::{ diff --git a/crates/curp/src/server/conflict/spec_pool_new.rs b/crates/curp/src/server/conflict/spec_pool_new.rs index d2156609a..7e7823da1 100644 --- a/crates/curp/src/server/conflict/spec_pool_new.rs +++ b/crates/curp/src/server/conflict/spec_pool_new.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use curp_external_api::conflict::SpeculativePoolOp; @@ -47,7 +47,6 @@ impl SpeculativePool { let _ignore = self.entries.remove(&entry.id); } - #[allow(unused)] /// Removes an entry from the pool by it's propose id pub(crate) fn remove_by_id(&mut self, id: &ProposeId) { if let Some(entry) = self.entries.remove(id) { @@ -67,6 +66,11 @@ impl SpeculativePool { self.entries.values() } + /// Returns all entry refs in the pool + pub(crate) fn all_ids(&self) -> impl Iterator { + self.entries.keys() + } + /// Returns the number of entries in the pool #[allow(clippy::arithmetic_side_effects)] // Pool sizes can't overflow a `usize` pub(crate) fn len(&self) -> usize { @@ -74,4 +78,19 @@ impl SpeculativePool { .iter() .fold(0, |sum, pool| sum + pool.len()) } + + /// Performs garbage collection on the spec pool with given entries from the leader + /// + /// Removes entries from the pool that are not present in the provided `leader_entries` + pub(crate) fn gc(&mut self, leader_entry_ids: &HashSet) { + let to_remove: Vec<_> = self + .entries + .keys() + .filter(|id| !leader_entry_ids.contains(id)) + .copied() + .collect(); + for id in to_remove { + self.remove_by_id(&id); + } + } } diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index fda111773..234c4cb09 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -48,8 +48,9 @@ use crate::{ InstallSnapshotRequest, InstallSnapshotResponse, MembershipResponse, MoveLeaderRequest, MoveLeaderResponse, PoolEntry, ProposeId, ProposeRequest, ProposeResponse, ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, ShutdownResponse, - SyncedResponse, TriggerShutdownRequest, TriggerShutdownResponse, TryBecomeLeaderNowRequest, - TryBecomeLeaderNowResponse, VoteRequest, VoteResponse, + SyncSpecPoolRequest, SyncSpecPoolResponse, SyncedResponse, TriggerShutdownRequest, + TriggerShutdownResponse, TryBecomeLeaderNowRequest, TryBecomeLeaderNowResponse, + VoteRequest, VoteResponse, }, server::{ cmd_worker::{after_sync, worker_reset, worker_snapshot}, @@ -582,6 +583,24 @@ impl, RC: RoleChange> CurpNode { } Ok(TryBecomeLeaderNowResponse::default()) } + + /// Handle `SyncSpecPool` request + pub(super) fn sync_spec_pool( + &self, + req: &SyncSpecPoolRequest, + ) -> Result { + let current_term = self.curp.term(); + if req.term < current_term { + return Err(CurpError::internal(format!( + "invalid leader term, current: {current_term}" + ))); + } + let entries: Vec = bincode::deserialize(&req.ids) + .unwrap_or_else(|_| unreachable!("failed to deserialize spec pool data")); + self.curp.gc_spec_pool(&entries.into_iter().collect()); + + Ok(SyncSpecPoolResponse::default()) + } } /// Spawned tasks diff --git a/crates/curp/src/server/curp_node/replication.rs b/crates/curp/src/server/curp_node/replication.rs index 2dac639c7..62947a780 100644 --- a/crates/curp/src/server/curp_node/replication.rs +++ b/crates/curp/src/server/curp_node/replication.rs @@ -12,7 +12,10 @@ use tracing::{debug, error, info, warn}; use utils::config::CurpConfig; use crate::{ - rpc::{connect::InnerConnectApiWrapper, AppendEntriesResponse, InstallSnapshotResponse}, + rpc::{ + connect::InnerConnectApiWrapper, AppendEntriesResponse, InstallSnapshotResponse, + SyncSpecPoolRequest, + }, server::{ metrics, raw_curp::{ @@ -63,7 +66,7 @@ impl, RC: RoleChange> CurpNode { Self::heartbeat_worker(action_tx.clone(), connects, cfg.clone(), self_id, self_term) .map(|result| info!("heartbeat worker exit, result: {result:?}")), ); - let replication_handles = node_states.into_iter().map(|(id, state)| { + let replication_handles = node_states.clone().into_iter().map(|(id, state)| { let cfg = cfg.clone(); info!("spawning replication task for {id}"); tokio::spawn(Self::replication_worker( @@ -75,9 +78,22 @@ impl, RC: RoleChange> CurpNode { cfg, )) }); + let spec_pool_sync_handles = node_states.into_iter().map(|(id, state)| { + let cfg = cfg.clone(); + info!("spawning sync spec pool task for {id}"); + tokio::spawn(Self::spec_pool_sync_worker( + id, + state, + action_tx.clone(), + self_id, + self_term, + cfg, + )) + }); *HANDLES.lock() = replication_handles .chain([state_handle]) .chain([heartbeat_handle]) + .chain(spec_pool_sync_handles) .collect(); } @@ -216,7 +232,10 @@ impl, RC: RoleChange> CurpNode { match action { Action::UpdateMatchIndex((_, index)) => next_index = index + 1, Action::UpdateNextIndex((_, index)) => next_index = index, - Action::GetLogFrom(_) | Action::StepDown(_) | Action::GetCommitIndex(_) => {} + Action::GetLogFrom(_) + | Action::StepDown(_) + | Action::GetCommitIndex(_) + | Action::GetSpecPoolEntryIds(_) => {} } let __ignore = action_tx.send(action); } @@ -303,4 +322,62 @@ impl, RC: RoleChange> CurpNode { .map_err(|err| warn!("snapshot to {node_id} failed, {err:?}")) .ok() } + + /// A worker responsible for sync speculative pool to followers in the cluster + async fn spec_pool_sync_worker( + node_id: u64, + node_state: NodeState, + action_tx: flume::Sender>, + self_id: u64, + self_term: u64, + cfg: CurpConfig, + ) { + let rpc_timeout = cfg.rpc_timeout; + let sync_interval = cfg.spec_pool_sync_interval; + let connect = node_state.connect(); + + loop { + tokio::time::sleep(sync_interval).await; + let (tx, rx) = oneshot::channel(); + if action_tx.send(Action::GetSpecPoolEntryIds(tx)).is_err() { + debug!( + "action_rx closed because the leader stepped down, exiting spec pool sync worker" + ); + break; + } + let entries = match rx.await { + Ok(x) => x, + Err(err) => { + error!("channel unexpectedly closed: {err}"); + return; + } + }; + let ids = bincode::serialize(&entries) + .unwrap_or_else(|err| unreachable!("serialize failed: {err}")); + Self::send_sync_spec_pool(connect, rpc_timeout, node_id, self_id, self_term, ids).await; + } + } + + /// Send `sync_spec_pool` request + async fn send_sync_spec_pool( + connect: &InnerConnectApiWrapper, + timeout: Duration, + node_id: u64, + self_id: u64, + self_term: u64, + ids_serialized: Vec, + ) { + debug!("{self_id} send append_entries to {node_id}"); + + let _ignore = connect + .sync_spec_pool( + SyncSpecPoolRequest { + term: self_term, + ids: ids_serialized, + }, + timeout, + ) + .await + .map_err(|err| warn!("sync spec pool to {node_id} failed, {err:?}")); + } } diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index 3e5b2acab..59759da27 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -40,6 +40,8 @@ use crate::rpc::RecordRequest; use crate::rpc::RecordResponse; use crate::rpc::ShutdownRequest; use crate::rpc::ShutdownResponse; +use crate::rpc::SyncSpecPoolRequest; +use crate::rpc::SyncSpecPoolResponse; use crate::rpc::TriggerShutdownRequest; use crate::rpc::TriggerShutdownResponse; use crate::rpc::TryBecomeLeaderNowRequest; @@ -253,6 +255,16 @@ impl, RC: RoleChange> crate::rpc::InnerProtoc self.inner.try_become_leader_now(request.get_ref()).await?, )) } + + #[instrument(skip_all, name = "curp_sync_spec_pool")] + async fn sync_spec_pool( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + Ok(tonic::Response::new( + self.inner.sync_spec_pool(request.get_ref())?, + )) + } } /// Used for bypassed connect because the `Protocol` trait requires `tonic::Streaming` diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 66951f511..1096978c7 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -15,6 +15,7 @@ use std::cmp::min; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::HashMap; +use std::collections::HashSet; use std::fmt::Debug; use std::iter; use std::sync::atomic::AtomicU8; @@ -1348,6 +1349,11 @@ impl RawCurp { self.ctx.node_states.all_states() } + /// Performs garbage collection on the spec pool with given entries from the leader + pub(super) fn gc_spec_pool(&self, leader_entry_ids: &HashSet) { + self.ctx.spec_pool.lock().gc(leader_entry_ids); + } + #[cfg(test)] /// Get a range of log entry pub(crate) fn get_log_from(&self, idx: u64) -> Vec>> { diff --git a/crates/curp/src/server/raw_curp/replication.rs b/crates/curp/src/server/raw_curp/replication.rs index 7bd0e1992..57e36cd45 100644 --- a/crates/curp/src/server/raw_curp/replication.rs +++ b/crates/curp/src/server/raw_curp/replication.rs @@ -1,7 +1,11 @@ +use std::collections::HashSet; + use curp_external_api::{cmd::Command, role_change::RoleChange, LogIndex}; use tokio::sync::oneshot; use tracing::{debug, error, info}; +use crate::rpc::ProposeId; + use super::{AppendEntries, RawCurp, SyncAction}; /// Represents various actions that can be performed on the `RawCurp` state machine @@ -25,6 +29,10 @@ pub(crate) enum Action { /// Step down the current node. /// Contains the latest term. StepDown(u64), + + /// Request to get all speculative pool entries + /// Contains a sender to send the entries. + GetSpecPoolEntryIds(oneshot::Sender>), } impl RawCurp { @@ -60,6 +68,14 @@ impl RawCurp { info!("received greater term: {node_term}, stepping down."); self.step_down(node_term); } + Action::GetSpecPoolEntryIds(tx) => { + if tx + .send(self.spec_pool().lock().all_ids().copied().collect()) + .is_err() + { + error!("send spec pool entries failed"); + } + } } } diff --git a/crates/utils/src/config.rs b/crates/utils/src/config.rs index 57f0880d1..5e8ff58ec 100644 --- a/crates/utils/src/config.rs +++ b/crates/utils/src/config.rs @@ -387,6 +387,11 @@ pub struct CurpConfig { #[builder(default = "default_log_entries_cap()")] #[serde(default = "default_log_entries_cap")] pub log_entries_cap: usize, + + /// Speculative Pool Sync Interval + #[builder(default = "default_spec_pool_sync_interval()")] + #[serde(with = "duration_format", default = "default_spec_pool_sync_interval")] + pub spec_pool_sync_interval: Duration, } /// default heartbeat interval @@ -547,6 +552,13 @@ pub const fn default_watch_progress_notify_interval() -> Duration { Duration::from_secs(600) } +/// default speculative pool sync interval +#[must_use] +#[inline] +pub const fn default_spec_pool_sync_interval() -> Duration { + Duration::from_secs(1800) +} + impl Default for CurpConfig { #[inline] fn default() -> Self { @@ -563,6 +575,7 @@ impl Default for CurpConfig { cmd_workers: default_cmd_workers(), gc_interval: default_gc_interval(), log_entries_cap: default_log_entries_cap(), + spec_pool_sync_interval: default_spec_pool_sync_interval(), } } } From 784c5065c6e38258af3b87364495b20990ce0861 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Wed, 6 Nov 2024 10:36:27 +0800 Subject: [PATCH 310/322] test: add speculative pool gc tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/tests/it/common/curp_group.rs | 13 ++ crates/curp/tests/it/server.rs | 137 +++++++++++++++++++++- 2 files changed, 149 insertions(+), 1 deletion(-) diff --git a/crates/curp/tests/it/common/curp_group.rs b/crates/curp/tests/it/common/curp_group.rs index 5cad34648..0eb3148bc 100644 --- a/crates/curp/tests/it/common/curp_group.rs +++ b/crates/curp/tests/it/common/curp_group.rs @@ -108,6 +108,19 @@ impl CurpGroup { inner } + pub async fn new_with_custom_sp_sync_interval(n_nodes: usize, interval: Duration) -> Self { + let config = Arc::new( + CurpConfigBuilder::default() + .spec_pool_sync_interval(interval) + .build() + .unwrap(), + ); + let configs = (0..n_nodes) + .map(|i| (format!("S{i}"), (Arc::clone(&config), Default::default()))) + .collect(); + Self::new_with_configs(configs, "S0".to_owned()).await + } + async fn new_with_configs( configs: BTreeMap, EngineConfig)>, leader_name: String, diff --git a/crates/curp/tests/it/server.rs b/crates/curp/tests/it/server.rs index f20381146..fc6a02ab5 100644 --- a/crates/curp/tests/it/server.rs +++ b/crates/curp/tests/it/server.rs @@ -20,10 +20,14 @@ use madsim::rand::{thread_rng, Rng}; use test_macros::abort_on_panic; use tokio::net::TcpListener; use tokio_stream::StreamExt; +use tonic::transport::Channel; use tracing_test::traced_test; use utils::config::ClientConfig; -use crate::common::curp_group::{CurpGroup, DEFAULT_SHUTDOWN_TIMEOUT}; +use crate::common::curp_group::{ + commandpb::{ProposeId, RecordRequest}, + CurpGroup, ProposeRequest, ProtocolClient, DEFAULT_SHUTDOWN_TIMEOUT, +}; #[tokio::test(flavor = "multi_thread")] #[abort_on_panic] @@ -865,3 +869,134 @@ fn assert_membership_response( let expect_node_ids: BTreeSet<_> = expect_node_ids.into_iter().collect(); assert_eq!(node_ids, expect_node_ids); } + +async fn record_to_node( + connect: &mut ProtocolClient, + propose_id: ProposeId, + command: Vec, +) -> bool { + connect + .record(tonic::Request::new(RecordRequest { + propose_id: Some(propose_id), + command, + })) + .await + .unwrap() + .into_inner() + .conflict +} + +#[tokio::test(flavor = "multi_thread")] +async fn curp_server_spec_pool_gc_ok() { + init_logger(); + // sets the initail sync interval to a relatively long duration + let group = CurpGroup::new_with_custom_sp_sync_interval(5, Duration::from_secs(1)).await; + let client = group.new_client().await; + + let leader = client.fetch_leader_id(true).await.unwrap(); + let follower_id = group.nodes.keys().find(|&id| &leader != id).unwrap(); + let mut follower_connect = group.get_connect(follower_id).await; + let cmd0 = bincode::serialize(&TestCommand::new_put(vec![0], 0)).unwrap(); + + // record a command to a follower node + let conflict = record_to_node( + &mut follower_connect, + ProposeId { + client_id: 1, + seq_num: 0, + }, + cmd0.clone(), + ) + .await; + assert!(!conflict); + + // on second record, it should return conflict + let conflict = record_to_node( + &mut follower_connect, + ProposeId { + client_id: 2, + seq_num: 0, + }, + cmd0.clone(), + ) + .await; + assert!(conflict); + + // wait for the sync to complete + tokio::time::sleep(Duration::from_secs(2)).await; + + // the follower should have removed the outdated entry from sp, and returns no conflict. + let conflict = record_to_node( + &mut follower_connect, + ProposeId { + client_id: 3, + seq_num: 0, + }, + cmd0.clone(), + ) + .await; + assert!(!conflict); +} + +#[tokio::test(flavor = "multi_thread")] +async fn curp_server_spec_pool_gc_should_not_remove_leader_entry() { + init_logger(); + // sets the initail sync interval to a relatively long duration + let group = CurpGroup::new_with_custom_sp_sync_interval(5, Duration::from_secs(1)).await; + let client = group.new_client().await; + + let leader = client.fetch_leader_id(true).await.unwrap(); + let follower_id = group.nodes.keys().find(|&id| &leader != id).unwrap(); + println!("leader: {leader}"); + let mut leader_connect = group.get_connect(&leader).await; + let mut follower_connect = group.get_connect(follower_id).await; + let cmd = bincode::serialize(&TestCommand::new_put(vec![0], 0)).unwrap(); + + // record a command to a follower node and leader node + let conflict = record_to_node( + &mut follower_connect, + ProposeId { + client_id: 1, + seq_num: 0, + }, + cmd.clone(), + ) + .await; + assert!(!conflict); + record_to_node( + &mut leader_connect, + ProposeId { + client_id: 1, + seq_num: 0, + }, + cmd.clone(), + ) + .await; + + // on second record, it should return conflict + let conflict = record_to_node( + &mut follower_connect, + ProposeId { + client_id: 2, + seq_num: 0, + }, + cmd.clone(), + ) + .await; + assert!(conflict); + + // wait for the sync to complete + tokio::time::sleep(Duration::from_secs(2)).await; + + // the follower should not remove the entry, and returns conflict + let conflict = record_to_node( + &mut follower_connect, + ProposeId { + client_id: 3, + seq_num: 0, + }, + cmd.clone(), + ) + .await; + assert!(conflict); +} From 9d1b9fd251f8a662e8c00bcebe5bbbdd1478cae3 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 7 Nov 2024 09:32:03 +0800 Subject: [PATCH 311/322] chore: update curp proto Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/proto/common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/curp/proto/common b/crates/curp/proto/common index 5f29de820..3d1387f92 160000 --- a/crates/curp/proto/common +++ b/crates/curp/proto/common @@ -1 +1 @@ -Subproject commit 5f29de8204724bf760b9efe9f87d9c095601f8b1 +Subproject commit 3d1387f926ec990ba17feb27317baae63e50f1fd From 9865e068d4f8bd7413f8808fb4a574797f19549c Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Thu, 7 Nov 2024 21:32:18 +0800 Subject: [PATCH 312/322] refactor: use sha256 for membership version generation Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- Cargo.lock | 5 +++-- crates/curp/Cargo.toml | 1 + crates/curp/proto/common | 2 +- crates/curp/src/client/cluster_state.rs | 2 +- crates/curp/src/client/unary/mod.rs | 4 ++-- crates/curp/src/member.rs | 14 ++++++++------ crates/curp/src/rpc/mod.rs | 8 +++----- crates/curp/src/server/curp_node/member_impl.rs | 2 +- crates/curp/src/server/curp_node/mod.rs | 2 +- crates/curp/src/server/raw_curp/mod.rs | 2 +- crates/curp/tests/it/server.rs | 2 +- 11 files changed, 23 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ba7660316..204fc8601 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -677,6 +677,7 @@ dependencies = [ "prost-build", "rand", "serde", + "serde_json", "sha2", "tempfile", "test-macros", @@ -2573,9 +2574,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.125" +version = "1.0.132" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83c8e735a073ccf5be70aa8066aa984eaf2fa000db6c8d0100ae605b366d31ed" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" dependencies = [ "itoa", "memchr", diff --git a/crates/curp/Cargo.toml b/crates/curp/Cargo.toml index 0da671f5f..324222b7a 100644 --- a/crates/curp/Cargo.toml +++ b/crates/curp/Cargo.toml @@ -36,6 +36,7 @@ priority-queue = "2.0.2" prost = "0.13" rand = "0.8.5" serde = { version = "1.0.204", features = ["derive", "rc"] } +serde_json = "1.0.132" sha2 = "0.10.8" thiserror = "1.0.61" tokio = { version = "0.2.25", package = "madsim-tokio", features = [ diff --git a/crates/curp/proto/common b/crates/curp/proto/common index 3d1387f92..247ae120b 160000 --- a/crates/curp/proto/common +++ b/crates/curp/proto/common @@ -1 +1 @@ -Subproject commit 3d1387f926ec990ba17feb27317baae63e50f1fd +Subproject commit 247ae120bc6e5ccabe3e5a6dcebd80fa078b2c86 diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index f012e3e72..e7e379c07 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -251,7 +251,7 @@ impl ClusterStateFull { /// Calculates the cluster version /// /// The cluster version is a hash of the current `Membership` - pub(crate) fn cluster_version(&self) -> u64 { + pub(crate) fn cluster_version(&self) -> Vec { self.membership.version() } diff --git a/crates/curp/src/client/unary/mod.rs b/crates/curp/src/client/unary/mod.rs index 90e092c25..ee740b430 100644 --- a/crates/curp/src/client/unary/mod.rs +++ b/crates/curp/src/client/unary/mod.rs @@ -47,7 +47,7 @@ impl RepeatableClientApi for Unary { /// Send propose to shutdown cluster async fn propose_shutdown(&self, ctx: Context) -> Result<(), Self::Error> { - let req = ShutdownRequest::new(ctx.propose_id(), 0); + let req = ShutdownRequest::new(ctx.propose_id(), ctx.cluster_state().cluster_version()); let timeout = self.config.wait_synced_timeout(); let _resp = ctx .cluster_state() @@ -59,7 +59,7 @@ impl RepeatableClientApi for Unary { /// Send move leader request async fn move_leader(&self, node_id: u64, ctx: Context) -> Result<(), Self::Error> { - let req = MoveLeaderRequest::new(node_id, 0); + let req = MoveLeaderRequest::new(node_id, ctx.cluster_state().cluster_version()); let timeout = self.config.wait_synced_timeout(); let _resp = ctx .cluster_state() diff --git a/crates/curp/src/member.rs b/crates/curp/src/member.rs index 4b8cf3e2d..ea4a55abc 100644 --- a/crates/curp/src/member.rs +++ b/crates/curp/src/member.rs @@ -8,6 +8,7 @@ use std::iter; use curp_external_api::LogIndex; use serde::Deserialize; use serde::Serialize; +use sha2::{Digest, Sha256}; use crate::quorum::Joint; use crate::quorum::QuorumSet; @@ -217,7 +218,7 @@ impl MembershipState { /// Calculates the cluster version /// /// The cluster version is a hash of the effective `Membership` - pub(crate) fn cluster_version(&self) -> u64 { + pub(crate) fn cluster_version(&self) -> Vec { self.effective().version() } @@ -349,11 +350,12 @@ impl Membership { } /// Calculates the version of this membership - pub(crate) fn version(&self) -> u64 { - // TODO: handle conflict? - let mut hasher = DefaultHasher::new(); - self.hash(&mut hasher); - hasher.finish() + pub(crate) fn version(&self) -> Vec { + let mut hasher = Sha256::new(); + let data = serde_json::to_vec(self) + .unwrap_or_else(|_| unreachable!("failed to serialize membership")); + hasher.update(data); + hasher.finalize().to_vec() } } diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index c48f228c2..8d24d2e98 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -50,8 +50,6 @@ pub use self::proto::{ SyncedResponse, WaitLearnerRequest, WaitLearnerResponse, - WaitSyncedRequest, - WaitSyncedResponse, }, inner_messagepb::inner_protocol_server::InnerProtocolServer, }; @@ -137,7 +135,7 @@ impl ProposeRequest { pub fn new( propose_id: ProposeId, cmd: &C, - cluster_version: u64, + cluster_version: Vec, term: u64, slow_path: bool, ) -> Self { @@ -370,7 +368,7 @@ impl InstallSnapshotResponse { impl ShutdownRequest { /// Create a new shutdown request - pub(crate) fn new(id: ProposeId, cluster_version: u64) -> Self { + pub(crate) fn new(id: ProposeId, cluster_version: Vec) -> Self { Self { propose_id: Some(id.into()), cluster_version, @@ -389,7 +387,7 @@ impl ShutdownRequest { impl MoveLeaderRequest { /// Create a new `MoveLeaderRequest` - pub(crate) fn new(node_id: ServerId, cluster_version: u64) -> Self { + pub(crate) fn new(node_id: ServerId, cluster_version: Vec) -> Self { Self { node_id, cluster_version, diff --git a/crates/curp/src/server/curp_node/member_impl.rs b/crates/curp/src/server/curp_node/member_impl.rs index 857cdf4bd..ccac31b96 100644 --- a/crates/curp/src/server/curp_node/member_impl.rs +++ b/crates/curp/src/server/curp_node/member_impl.rs @@ -40,7 +40,7 @@ impl, RC: RoleChange> CurpNode { &self, request: ChangeMembershipRequest, ) -> Result { - self.curp.check_cluster_version(request.cluster_version)?; + self.curp.check_cluster_version(&request.cluster_version)?; let changes = request .changes diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 234c4cb09..e24910c14 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -151,7 +151,7 @@ impl, RC: RoleChange> CurpNode { } self.curp.check_leader_transfer()?; self.curp.check_term(req.term)?; - self.curp.check_cluster_version(req.cluster_version)?; + self.curp.check_cluster_version(&req.cluster_version)?; if req.slow_path { resp_tx.set_conflict(true); diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 1096978c7..b1ee3f5c0 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -1131,7 +1131,7 @@ impl RawCurp { /// Returns `CurpError::WrongClusterVersion` if the give cluster version does not match the /// effective membership version of the current node. - pub(super) fn check_cluster_version(&self, cluster_version: u64) -> Result<(), CurpError> { + pub(super) fn check_cluster_version(&self, cluster_version: &[u8]) -> Result<(), CurpError> { if self.ms.read().cluster().cluster_version() == cluster_version { return Ok(()); } diff --git a/crates/curp/tests/it/server.rs b/crates/curp/tests/it/server.rs index fc6a02ab5..47833362d 100644 --- a/crates/curp/tests/it/server.rs +++ b/crates/curp/tests/it/server.rs @@ -26,7 +26,7 @@ use utils::config::ClientConfig; use crate::common::curp_group::{ commandpb::{ProposeId, RecordRequest}, - CurpGroup, ProposeRequest, ProtocolClient, DEFAULT_SHUTDOWN_TIMEOUT, + CurpGroup, ProtocolClient, DEFAULT_SHUTDOWN_TIMEOUT, }; #[tokio::test(flavor = "multi_thread")] From e7a035d77f855d3b4cd54e6856bac86814e86816 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Sun, 10 Nov 2024 21:58:18 +0800 Subject: [PATCH 313/322] chore: refine retry code Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/retry.rs | 67 +++++++++++++++++---------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/crates/curp/src/client/retry.rs b/crates/curp/src/client/retry.rs index 7f317ed08..33698d381 100644 --- a/crates/curp/src/client/retry.rs +++ b/crates/curp/src/client/retry.rs @@ -237,28 +237,35 @@ impl Retry { Ok(Context::new(propose_id, cluster_state)) } - /// Updates the cluster state when error occurs. - fn update_cluster_state_on_error(&self, err: &CurpError) { - match *err { - // Some error that needs to update cluster state - CurpError::RpcTransport(()) - | CurpError::WrongClusterVersion(()) - | CurpError::Redirect(_) // FIXME: The redirect error needs to include full cluster state - | CurpError::Zombie(()) => { - self.cluster_state.errored(); + /// Execute a future and update cluster state if an error is returned. + async fn with_error_handling(&self, fut: Fut) -> Result + where + Fut: Future>, + { + let result = fut.await; + if let Err(ref err) = result { + match *err { + // Some error that needs to update cluster state + CurpError::RpcTransport(()) + | CurpError::WrongClusterVersion(()) + | CurpError::Redirect(_) // FIXME: The redirect error needs to include full cluster state + | CurpError::Zombie(()) => { + self.cluster_state.errored(); + } + CurpError::KeyConflict(()) + | CurpError::Duplicated(()) + | CurpError::ExpiredClientId(()) + | CurpError::InvalidConfig(()) + | CurpError::NodeNotExists(()) + | CurpError::NodeAlreadyExists(()) + | CurpError::LearnerNotCatchUp(()) + | CurpError::ShuttingDown(()) + | CurpError::Internal(_) + | CurpError::LeaderTransfer(_) + | CurpError::InvalidMemberChange(()) => {} } - CurpError::KeyConflict(()) - | CurpError::Duplicated(()) - | CurpError::ExpiredClientId(()) - | CurpError::InvalidConfig(()) - | CurpError::NodeNotExists(()) - | CurpError::NodeAlreadyExists(()) - | CurpError::LearnerNotCatchUp(()) - | CurpError::ShuttingDown(()) - | CurpError::Internal(_) - | CurpError::LeaderTransfer(_) - | CurpError::InvalidMemberChange(()) => {}, } + result } } @@ -318,15 +325,15 @@ where let mut backoff = self.retry_config.init_backoff(); let mut last_err = None; while let Some(delay) = backoff.next_delay() { - let context = match self.get_context().await { + let context = match self.with_error_handling(self.get_context()).await { Ok(x) => x, Err(err) => { + // TODO: refactor on_error like with_error_handling self.on_error(err, delay, &mut last_err).await?; continue; } }; - let result = f(&self.inner, context).await; - match result { + match f(&self.inner, context).await { Ok(res) => return Ok(res), Err(err) => self.on_error(err, delay, &mut last_err).await?, }; @@ -346,7 +353,6 @@ where last_err: &mut Option, ) -> Result<(), tonic::Status> { Self::early_return(&err)?; - self.update_cluster_state_on_error(&err); #[cfg(feature = "client-metrics")] super::metrics::get().client_retry_count.add(1, &[]); @@ -405,15 +411,10 @@ where where F: Future>, { - let ctx = self.get_context().await.map_err(|err| { - self.update_cluster_state_on_error(&err); - err - })?; - - f(&self.inner, ctx).await.map_err(|err| { - self.update_cluster_state_on_error(&err); - err.into() - }) + let ctx = self.with_error_handling(self.get_context()).await?; + self.with_error_handling(f(&self.inner, ctx)) + .await + .map_err(Into::into) } } From 349b81e3dde6f3fdd4b8196679777da730109d0b Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Mon, 11 Nov 2024 19:40:32 +0800 Subject: [PATCH 314/322] refactor: implement speculative pool gc Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/proto/common | 2 +- crates/curp/src/client/cluster_state.rs | 97 ++++++++++++++++--- crates/curp/src/client/tests.rs | 31 ++++-- crates/curp/src/client/unary/propose_impl.rs | 57 ++++++++--- crates/curp/src/log_entry/entry_data.rs | 9 ++ crates/curp/src/response.rs | 3 +- crates/curp/src/rpc/mod.rs | 13 ++- crates/curp/src/server/cmd_worker/mod.rs | 13 ++- .../curp/src/server/conflict/spec_pool_new.rs | 43 +++++++- crates/curp/src/server/conflict/tests.rs | 4 +- crates/curp/src/server/curp_node/mod.rs | 36 ++++--- .../curp/src/server/curp_node/replication.rs | 71 ++------------ crates/curp/src/server/raw_curp/mod.rs | 29 +++--- .../curp/src/server/raw_curp/replication.rs | 23 ++--- crates/curp/src/server/raw_curp/tests.rs | 7 +- crates/curp/src/server/storage/db.rs | 18 +++- crates/curp/src/server/storage/mod.rs | 4 +- 17 files changed, 316 insertions(+), 144 deletions(-) diff --git a/crates/curp/proto/common b/crates/curp/proto/common index 247ae120b..403f9f428 160000 --- a/crates/curp/proto/common +++ b/crates/curp/proto/common @@ -1 +1 @@ -Subproject commit 247ae120bc6e5ccabe3e5a6dcebd80fa078b2c86 +Subproject commit 403f9f428b0b8317591792d40b3eca2f3a580388 diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index e7e379c07..e51f46622 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -1,5 +1,5 @@ use std::{ - collections::{hash_map::DefaultHasher, HashMap}, + collections::{hash_map::DefaultHasher, HashMap, HashSet}, hash::{Hash, Hasher}, sync::Arc, }; @@ -222,6 +222,51 @@ impl ClusterStateFull { false } + /// Execute an operation on each follower, until a quorum is reached. + pub(crate) async fn for_each_follower_with_expect< + Fut: Future, + R, + B, + T, + FilterMap, + Folder, + Expect, + >( + self, + mut f: impl FnMut(Arc) -> Fut, + mut filter: FilterMap, + mut folder: Folder, + mut b: B, + mut expect: Expect, + ) -> Option + where + FilterMap: FnMut(R) -> Option, + Folder: FnMut((&mut Vec, B), (u64, T)) -> B, + Expect: FnMut(&dyn QuorumSet>, Vec) -> bool, + { + let qs = self.membership.as_joint(); + let leader_id = self.leader_id(); + + #[allow(clippy::pattern_type_mismatch)] + let stream: FuturesUnordered<_> = self + .member_connects() + .filter(|(id, _)| *id != leader_id) + .map(|(id, conn)| f(Arc::clone(conn)).map(move |r| (id, r))) + .collect(); + let mut filtered = + stream.filter_map(|(id, r)| futures::future::ready(filter(r).map(|t| (id, t)))); + + let mut ids = vec![]; + while let Some(x) = filtered.next().await { + b = folder((&mut ids, b), x); + if expect(&qs, ids.clone().into_iter().chain([leader_id]).collect()) { + return Some(b); + } + } + + None + } + /// Gets member connects fn member_connects(&self) -> impl Iterator)> { self.membership @@ -283,12 +328,18 @@ mod test { match id { 0 => { conn.expect_record().returning(|_req, _timeout| { - Ok(Response::new(RecordResponse { conflict: true })) + Ok(Response::new(RecordResponse { + conflict: true, + sp_version: 0, + })) }); } 1 | 2 | 3 | 4 => { conn.expect_record().returning(|_req, _timeout| { - Ok(Response::new(RecordResponse { conflict: false })) + Ok(Response::new(RecordResponse { + conflict: false, + sp_version: 0, + })) }); } _ => unreachable!("there are only 5 nodes"), @@ -314,12 +365,18 @@ mod test { match id { 2 => { conn.expect_record().returning(|_req, _timeout| { - Ok(Response::new(RecordResponse { conflict: true })) + Ok(Response::new(RecordResponse { + conflict: true, + sp_version: 0, + })) }); } 0 | 1 | 3 | 4 => { conn.expect_record().returning(|_req, _timeout| { - Ok(Response::new(RecordResponse { conflict: false })) + Ok(Response::new(RecordResponse { + conflict: false, + sp_version: 0, + })) }); } _ => unreachable!("there are only 5 nodes"), @@ -348,12 +405,18 @@ mod test { match id { 0 => { conn.expect_record().returning(|_req, _timeout| { - Ok(Response::new(RecordResponse { conflict: true })) + Ok(Response::new(RecordResponse { + conflict: true, + sp_version: 0, + })) }); } 1 | 2 | 3 | 4 => { conn.expect_record().returning(|_req, _timeout| { - Ok(Response::new(RecordResponse { conflict: false })) + Ok(Response::new(RecordResponse { + conflict: false, + sp_version: 0, + })) }); } _ => unreachable!("there are only 5 nodes"), @@ -386,12 +449,18 @@ mod test { match id { 0 | 1 => { conn.expect_record().returning(|_req, _timeout| { - Ok(Response::new(RecordResponse { conflict: false })) + Ok(Response::new(RecordResponse { + conflict: false, + sp_version: 0, + })) }); } 2 | 3 | 4 => { conn.expect_record().returning(|_req, _timeout| { - Ok(Response::new(RecordResponse { conflict: true })) + Ok(Response::new(RecordResponse { + conflict: true, + sp_version: 0, + })) }); } _ => unreachable!("there are only 5 nodes"), @@ -423,7 +492,10 @@ mod test { match id { 0 | 1 | 2 | 3 | 4 => { conn.expect_record().returning(|_req, _timeout| { - Ok(Response::new(RecordResponse { conflict: false })) + Ok(Response::new(RecordResponse { + conflict: false, + sp_version: 0, + })) }); } _ => unreachable!("there are only 5 nodes"), @@ -448,7 +520,10 @@ mod test { match id { 0 | 1 | 2 | 3 | 4 => { conn.expect_record().returning(|_req, _timeout| { - Ok(Response::new(RecordResponse { conflict: false })) + Ok(Response::new(RecordResponse { + conflict: false, + sp_version: 0, + })) }); } _ => unreachable!("there are only 5 nodes"), diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index 684baac06..6cb792428 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -69,6 +69,7 @@ fn build_propose_response(conflict: bool) -> OpResponse { let resp = ResponseOp::Propose(ProposeResponse::new_result::( &Ok(TestCommandResult::default()), conflict, + 0, )); OpResponse { op: Some(resp) } } @@ -139,8 +140,14 @@ async fn test_unary_propose_fast_path_works() { conn.expect_record().return_once(move |_req, _timeout| { let resp = match id { 0 => unreachable!("leader should not receive record request"), - 1 | 2 | 3 => RecordResponse { conflict: false }, - 4 => RecordResponse { conflict: true }, + 1 | 2 | 3 => RecordResponse { + conflict: false, + sp_version: 0, + }, + 4 => RecordResponse { + conflict: true, + sp_version: 0, + }, _ => unreachable!("there are only 5 nodes"), }; Ok(tonic::Response::new(resp)) @@ -174,8 +181,14 @@ async fn test_unary_propose_slow_path_works() { conn.expect_record().return_once(move |_req, _timeout| { let resp = match id { 0 => unreachable!("leader should not receive record request"), - 1 | 2 | 3 => RecordResponse { conflict: false }, - 4 => RecordResponse { conflict: true }, + 1 | 2 | 3 => RecordResponse { + conflict: false, + sp_version: 0, + }, + 4 => RecordResponse { + conflict: true, + sp_version: 0, + }, _ => unreachable!("there are only 5 nodes"), }; Ok(tonic::Response::new(resp)) @@ -220,8 +233,14 @@ async fn test_unary_propose_fast_path_fallback_slow_path() { conn.expect_record().return_once(move |_req, _timeout| { let resp = match id { 0 => unreachable!("leader should not receive record request"), - 1 | 2 => RecordResponse { conflict: false }, - 3 | 4 => RecordResponse { conflict: true }, + 1 | 2 => RecordResponse { + conflict: false, + sp_version: 0, + }, + 3 | 4 => RecordResponse { + conflict: true, + sp_version: 0, + }, _ => unreachable!("there are only 5 nodes"), }; Ok(tonic::Response::new(resp)) diff --git a/crates/curp/src/client/unary/propose_impl.rs b/crates/curp/src/client/unary/propose_impl.rs index 37251355f..6f51ea446 100644 --- a/crates/curp/src/client/unary/propose_impl.rs +++ b/crates/curp/src/client/unary/propose_impl.rs @@ -2,6 +2,7 @@ use std::{pin::Pin, sync::Arc}; use curp_external_api::cmd::Command; use futures::{future, stream, FutureExt, Stream, StreamExt}; +use tonic::Response; use crate::{ client::{connect::ProposeResponse, retry::Context}, @@ -22,6 +23,8 @@ enum ProposeEvent { conflict_l: bool, /// Speculative execution result er: Result, + /// Speculative pool version + sp_version_l: u64, }, /// After sync result AfterSync { @@ -30,8 +33,8 @@ enum ProposeEvent { }, /// Record result Record { - /// conflict returned by the follower - conflict: bool, + /// Speculative pool version + sp_version: Option, }, } @@ -55,9 +58,24 @@ impl Unary { | (ProposeEvent::AfterSync { asr }, ProposeEvent::SpecExec { er, .. }) => { Ok(Self::combine_er_asr(er, asr)) } - (ProposeEvent::SpecExec { conflict_l, er }, ProposeEvent::Record { conflict }) - | (ProposeEvent::Record { conflict }, ProposeEvent::SpecExec { conflict_l, er }) => { - let require_asr = !use_fast_path || conflict | conflict_l; + ( + ProposeEvent::SpecExec { + conflict_l, + er, + sp_version_l, + }, + ProposeEvent::Record { sp_version }, + ) + | ( + ProposeEvent::Record { sp_version }, + ProposeEvent::SpecExec { + conflict_l, + er, + sp_version_l, + }, + ) => { + let require_asr = + !use_fast_path || conflict_l || sp_version.map_or(true, |v| v != sp_version_l); Self::with_spec_exec(stream, er, require_asr).await } (ProposeEvent::AfterSync { asr }, ProposeEvent::Record { .. }) @@ -89,7 +107,7 @@ impl Unary { if use_fast_path { let event = Self::next_event(&mut stream_pinned).await?; match event { - ProposeEvent::SpecExec { conflict_l, er } => { + ProposeEvent::SpecExec { conflict_l, er, .. } => { Self::with_spec_exec(stream_pinned, er, conflict_l).await } ProposeEvent::AfterSync { asr } => Self::with_after_sync(stream_pinned, asr).await, @@ -179,17 +197,31 @@ impl Unary { let record_req = RecordRequest::new::(ctx.propose_id(), cmd); let record = move |conn: Arc| { let record_req_c = record_req.clone(); - async move { conn.record(record_req_c, timeout).await } + async move { + conn.record(record_req_c, timeout) + .await + .map(Response::into_inner) + } }; let stream = ctx .cluster_state() - .for_each_follower_with_quorum( + .for_each_follower_with_expect( record, - |res| res.is_ok_and(|resp| !resp.get_ref().conflict), - |qs, ids| QuorumSet::is_super_quorum(qs, ids), + |res| res.ok().filter(|r| !r.conflict).map(|r| r.sp_version), + |(ids, latest), (id, sp_version)| { + if sp_version > latest { + ids.clear(); + ids.push(id); + sp_version + } else { + latest + } + }, + 0, + |qs, ids| qs.is_super_quorum(ids), ) - .map(move |ok| ProposeEvent::Record { conflict: !ok }) + .map(move |ok| ProposeEvent::Record { sp_version: ok }) .map(Ok) .into_stream(); @@ -202,7 +234,7 @@ impl Unary { #[allow(clippy::type_complexity)] // copied from the return value of `ConnectApi::propose_stream` fn flatten_propose_stream_result( result: Result< - tonic::Response> + Send>>, + Response> + Send>>, CurpError, >, ) -> EventStream<'static, C> { @@ -274,6 +306,7 @@ impl From for ProposeEvent { match resp.op.expect("op should always exist") { ResponseOp::Propose(resp) => Self::SpecExec { conflict_l: resp.conflict, + sp_version_l: resp.sp_version, er: resp .map_result::(Result::transpose) .ok() diff --git a/crates/curp/src/log_entry/entry_data.rs b/crates/curp/src/log_entry/entry_data.rs index c255d09cd..48c3c2531 100644 --- a/crates/curp/src/log_entry/entry_data.rs +++ b/crates/curp/src/log_entry/entry_data.rs @@ -4,6 +4,7 @@ use serde::Deserialize; use serde::Serialize; use crate::member::Membership; +use crate::server::conflict::spec_pool_new::SpecPoolRepl; #[allow(variant_size_differences)] // The `Membership` won't be too large /// Entry data of a `LogEntry` @@ -18,6 +19,8 @@ pub(crate) enum EntryData { Shutdown, /// `Member` entry Member(Membership), + /// Speculative pool replication entry + SpecPoolReplication(SpecPoolRepl), } impl From> for EntryData { @@ -31,3 +34,9 @@ impl From for EntryData { EntryData::Member(value) } } + +impl From for EntryData { + fn from(value: SpecPoolRepl) -> Self { + EntryData::SpecPoolReplication(value) + } +} diff --git a/crates/curp/src/response.rs b/crates/curp/src/response.rs index 9305e3c58..a2941379d 100644 --- a/crates/curp/src/response.rs +++ b/crates/curp/src/response.rs @@ -55,7 +55,8 @@ impl ResponseSender { /// Sends the error result pub(super) fn send_err(&self, err: C::Error) { - let er = ProposeResponse::new_result::(&Err(err.clone()), false); + // An error does not need `sp_version` + let er = ProposeResponse::new_result::(&Err(err.clone()), false, 0); let asr = SyncedResponse::new_result::(&Err(err)); self.send_propose(er); self.send_synced(asr); diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index 8d24d2e98..05fe04219 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -170,7 +170,11 @@ impl ProposeRequest { impl ProposeResponse { /// Create an ok propose response - pub(crate) fn new_result(result: &Result, conflict: bool) -> Self { + pub(crate) fn new_result( + result: &Result, + conflict: bool, + sp_version: u64, + ) -> Self { let result = match *result { Ok(ref er) => Some(CmdResult { result: Some(CmdResultInner::Ok(er.encode())), @@ -179,7 +183,11 @@ impl ProposeResponse { result: Some(CmdResultInner::Error(e.encode())), }), }; - Self { result, conflict } + Self { + result, + conflict, + sp_version, + } } /// Create an empty propose response @@ -188,6 +196,7 @@ impl ProposeResponse { Self { result: None, conflict: false, + sp_version: 0, } } diff --git a/crates/curp/src/server/cmd_worker/mod.rs b/crates/curp/src/server/cmd_worker/mod.rs index b7b89d993..f12697bb1 100644 --- a/crates/curp/src/server/cmd_worker/mod.rs +++ b/crates/curp/src/server/cmd_worker/mod.rs @@ -97,7 +97,7 @@ fn after_sync_cmds, RC: RoleChange>( .collect(); let results = ce.after_sync(cmds, Some(highest_index)); - send_results(results.into_iter(), resp_txs); + send_as_results(results.into_iter(), resp_txs); for (entry, _) in cmd_entries { curp.trigger(&entry.propose_id); @@ -107,7 +107,7 @@ fn after_sync_cmds, RC: RoleChange>( } /// Send cmd results to clients -fn send_results<'a, C, R, S>(results: R, txs: S) +fn send_as_results<'a, C, R, S>(results: R, txs: S) where C: Command, R: Iterator, C::Error>>, @@ -118,7 +118,9 @@ where Ok(r) => { let (asr, er_opt) = r.into_parts(); let _ignore_er = tx_opt.as_ref().zip(er_opt.as_ref()).map(|(tx, er)| { - tx.send_propose(ProposeResponse::new_result::(&Ok(er.clone()), true)); + // In after sync result, `sp_version` could be safely ignored (set to 0) as the + // command has successfully replicated to the majority of nodes + tx.send_propose(ProposeResponse::new_result::(&Ok(er.clone()), true, 0)); }); let _ignore_asr = tx_opt .as_ref() @@ -155,6 +157,11 @@ fn after_sync_others, RC: RoleChange>( // The no-op command has been applied to state machine (EntryData::Empty, _) => curp.set_no_op_applied(), (EntryData::Member(_), _) => {} + (EntryData::SpecPoolReplication(r), _) => { + let mut sp_l = curp.spec_pool().lock(); + sp_l.update_version(r.version()); + sp_l.gc(r.ids()); + } _ => unreachable!(), } diff --git a/crates/curp/src/server/conflict/spec_pool_new.rs b/crates/curp/src/server/conflict/spec_pool_new.rs index 7e7823da1..ab779663c 100644 --- a/crates/curp/src/server/conflict/spec_pool_new.rs +++ b/crates/curp/src/server/conflict/spec_pool_new.rs @@ -1,6 +1,7 @@ use std::collections::{HashMap, HashSet}; use curp_external_api::conflict::SpeculativePoolOp; +use serde::{Deserialize, Serialize}; use crate::rpc::{PoolEntry, ProposeId}; @@ -13,14 +14,17 @@ pub(crate) struct SpeculativePool { command_sps: Vec>, /// propose id to entry mapping entries: HashMap>, + /// Current version + version: u64, } impl SpeculativePool { /// Creates a new pool - pub(crate) fn new(command_sps: Vec>) -> Self { + pub(crate) fn new(command_sps: Vec>, version: u64) -> Self { Self { command_sps, entries: HashMap::new(), + version, } } @@ -93,4 +97,41 @@ impl SpeculativePool { self.remove_by_id(&id); } } + + /// Returns the current version + pub(crate) fn version(&self) -> u64 { + self.version + } + + /// Updates the current version + pub(crate) fn update_version(&mut self, version: u64) { + debug_assert!(version > self.version, "invalid version: {version}"); + self.version = version; + } +} + +/// A Speculative Pool log entry +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub(crate) struct SpecPoolRepl { + /// The version of this entry + version: u64, + /// Propose ids of the leader's speculative pool entries + ids: HashSet, +} + +impl SpecPoolRepl { + /// Creates a new `SpecPoolEntry` + pub(crate) fn new(version: u64, ids: HashSet) -> Self { + Self { version, ids } + } + + /// Returns the version of this entry + pub(crate) fn version(&self) -> u64 { + self.version + } + + /// Returns the propose ids + pub(crate) fn ids(&self) -> &HashSet { + &self.ids + } } diff --git a/crates/curp/src/server/conflict/tests.rs b/crates/curp/src/server/conflict/tests.rs index bc9f1d6d1..10a18cc64 100644 --- a/crates/curp/src/server/conflict/tests.rs +++ b/crates/curp/src/server/conflict/tests.rs @@ -105,7 +105,7 @@ impl UncommittedPoolOp for TestUcp { #[test] fn conflict_should_be_detected_in_sp() { - let mut sp = SpeculativePool::new(vec![Box::new(TestSp::default())]); + let mut sp = SpeculativePool::new(vec![Box::new(TestSp::default())], 0); let entry1 = PoolEntry::new(ProposeId::default(), Arc::new(0)); let entry2 = PoolEntry::new(ProposeId::default(), Arc::new(1)); assert!(sp.insert(entry1.clone()).is_none()); @@ -117,7 +117,7 @@ fn conflict_should_be_detected_in_sp() { #[test] fn sp_should_returns_all_entries() { - let mut sp = SpeculativePool::new(vec![Box::new(TestSp::default())]); + let mut sp = SpeculativePool::new(vec![Box::new(TestSp::default())], 0); let entries: Vec<_> = (0..10) .map(|i| PoolEntry::new(ProposeId::default(), Arc::new(i))) .collect(); diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index e24910c14..1daa283e6 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -117,7 +117,7 @@ where } /// Entry to execute -type ExecutorEntry = (Arc>, Arc); +type ExecutorEntry = ((Arc>, Arc), u64); /// `CurpNode` represents a single node of curp cluster pub(super) struct CurpNode, RC: RoleChange> { @@ -172,9 +172,12 @@ impl, RC: RoleChange> CurpNode { } let id = req.propose_id(); let cmd: Arc = Arc::new(req.cmd()?); - let conflict = self.curp.follower_record(id, &cmd); + let (conflict, sp_version) = self.curp.follower_record(id, &cmd); - Ok(RecordResponse { conflict }) + Ok(RecordResponse { + conflict, + sp_version, + }) } /// Handle `Record` requests @@ -234,7 +237,8 @@ impl, RC: RoleChange> CurpNode { let cmd_executor_c = cmd_executor.clone(); let _ignore = tokio::spawn(async move { tokio::join!(wait_conflict, wait_no_op); - cmd_executor_c((entry, resp_tx)); + // read only commands does not need `sp_version` + cmd_executor_c(((entry, resp_tx), 0)); }); } } @@ -253,7 +257,7 @@ impl, RC: RoleChange> CurpNode { let pool_entries = proposes .iter() .map(|p| PoolEntry::new(p.id, Arc::clone(&p.cmd))); - let conflicts = curp.leader_record(pool_entries); + let (conflicts, sp_version) = curp.leader_record(pool_entries); for (p, conflict) in proposes.iter().zip(conflicts) { info!("handle mutative cmd: {:?}, conflict: {conflict}", p.cmd); p.resp_tx.set_conflict(conflict); @@ -268,21 +272,30 @@ impl, RC: RoleChange> CurpNode { .into_iter() .zip(resp_txs) .filter(|(_, tx)| !tx.is_conflict()) + .zip(std::iter::repeat(sp_version)) .for_each(cmd_executor); } /// Speculatively execute a command fn build_executor(ce: Arc, curp: Arc>) -> impl Fn(ExecutorEntry) + Clone { - move |(entry, resp_tx): (_, Arc)| { + move |((entry, resp_tx), sp_version): ExecutorEntry| { info!("spec execute entry: {entry:?}"); let result = execute(&entry, ce.as_ref(), curp.as_ref()); match result { Ok((er, Some(asr))) => { - resp_tx.send_propose(ProposeResponse::new_result::(&Ok(er), false)); + resp_tx.send_propose(ProposeResponse::new_result::( + &Ok(er), + false, + sp_version, + )); resp_tx.send_synced(SyncedResponse::new_result::(&Ok(asr))); } Ok((er, None)) => { - resp_tx.send_propose(ProposeResponse::new_result::(&Ok(er), false)); + resp_tx.send_propose(ProposeResponse::new_result::( + &Ok(er), + false, + sp_version, + )); } Err(e) => resp_tx.send_err::(e), } @@ -715,10 +728,11 @@ impl, RC: RoleChange> CurpNode { .map_err(|e| CurpError::internal(format!("get applied index error, {e}")))?; let (as_tx, as_rx) = flume::unbounded(); let (propose_tx, propose_rx) = flume::bounded(4096); - let sp = Arc::new(Mutex::new(SpeculativePool::new(sps))); - let ucp = Arc::new(Mutex::new(UncommittedPool::new(ucps))); // create curp state machine - let (voted_for, entries) = storage.recover()?; + let (voted_for, entries, sp_version) = storage.recover()?; + let sp = Arc::new(Mutex::new(SpeculativePool::new(sps, sp_version))); + let ucp = Arc::new(Mutex::new(UncommittedPool::new(ucps))); + let curp = Arc::new( RawCurp::builder() .is_leader(is_leader) diff --git a/crates/curp/src/server/curp_node/replication.rs b/crates/curp/src/server/curp_node/replication.rs index 62947a780..aee27b84e 100644 --- a/crates/curp/src/server/curp_node/replication.rs +++ b/crates/curp/src/server/curp_node/replication.rs @@ -12,10 +12,7 @@ use tracing::{debug, error, info, warn}; use utils::config::CurpConfig; use crate::{ - rpc::{ - connect::InnerConnectApiWrapper, AppendEntriesResponse, InstallSnapshotResponse, - SyncSpecPoolRequest, - }, + rpc::{connect::InnerConnectApiWrapper, AppendEntriesResponse, InstallSnapshotResponse}, server::{ metrics, raw_curp::{ @@ -78,22 +75,13 @@ impl, RC: RoleChange> CurpNode { cfg, )) }); - let spec_pool_sync_handles = node_states.into_iter().map(|(id, state)| { - let cfg = cfg.clone(); - info!("spawning sync spec pool task for {id}"); - tokio::spawn(Self::spec_pool_sync_worker( - id, - state, - action_tx.clone(), - self_id, - self_term, - cfg, - )) - }); + let spec_pool_sync_handle = + tokio::spawn(Self::spec_pool_sync_worker(action_tx.clone(), cfg.clone())); + *HANDLES.lock() = replication_handles .chain([state_handle]) .chain([heartbeat_handle]) - .chain(spec_pool_sync_handles) + .chain([spec_pool_sync_handle]) .collect(); } @@ -235,7 +223,7 @@ impl, RC: RoleChange> CurpNode { Action::GetLogFrom(_) | Action::StepDown(_) | Action::GetCommitIndex(_) - | Action::GetSpecPoolEntryIds(_) => {} + | Action::ReplicateSpecPoolSync => {} } let __ignore = action_tx.send(action); } @@ -324,60 +312,17 @@ impl, RC: RoleChange> CurpNode { } /// A worker responsible for sync speculative pool to followers in the cluster - async fn spec_pool_sync_worker( - node_id: u64, - node_state: NodeState, - action_tx: flume::Sender>, - self_id: u64, - self_term: u64, - cfg: CurpConfig, - ) { - let rpc_timeout = cfg.rpc_timeout; + async fn spec_pool_sync_worker(action_tx: flume::Sender>, cfg: CurpConfig) { let sync_interval = cfg.spec_pool_sync_interval; - let connect = node_state.connect(); loop { tokio::time::sleep(sync_interval).await; - let (tx, rx) = oneshot::channel(); - if action_tx.send(Action::GetSpecPoolEntryIds(tx)).is_err() { + if action_tx.send(Action::ReplicateSpecPoolSync).is_err() { debug!( "action_rx closed because the leader stepped down, exiting spec pool sync worker" ); break; } - let entries = match rx.await { - Ok(x) => x, - Err(err) => { - error!("channel unexpectedly closed: {err}"); - return; - } - }; - let ids = bincode::serialize(&entries) - .unwrap_or_else(|err| unreachable!("serialize failed: {err}")); - Self::send_sync_spec_pool(connect, rpc_timeout, node_id, self_id, self_term, ids).await; } } - - /// Send `sync_spec_pool` request - async fn send_sync_spec_pool( - connect: &InnerConnectApiWrapper, - timeout: Duration, - node_id: u64, - self_id: u64, - self_term: u64, - ids_serialized: Vec, - ) { - debug!("{self_id} send append_entries to {node_id}"); - - let _ignore = connect - .sync_spec_pool( - SyncSpecPoolRequest { - term: self_term, - ids: ids_serialized, - }, - timeout, - ) - .await - .map_err(|err| warn!("sync spec pool to {node_id} failed, {err:?}")); - } } diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index b1ee3f5c0..f6e3d8da6 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -550,23 +550,27 @@ impl RawCurp { } /// Handles record - pub(super) fn follower_record(&self, propose_id: ProposeId, cmd: &Arc) -> bool { - let conflict = self - .ctx - .spec_pool - .lock() - .insert(PoolEntry::new(propose_id, Arc::clone(cmd))) - .is_some(); + pub(super) fn follower_record(&self, propose_id: ProposeId, cmd: &Arc) -> (bool, u64) { + let (conflict, version) = self.ctx.spec_pool.map_lock(|mut sp| { + ( + sp.insert(PoolEntry::new(propose_id, Arc::clone(cmd))) + .is_some(), + sp.version(), + ) + }); if conflict { metrics::get() .proposals_failed .add(1, &[KeyValue::new("reason", "follower key conflict")]); } - conflict + (conflict, version) } /// Handles record - pub(super) fn leader_record(&self, entries: impl Iterator>) -> Vec { + pub(super) fn leader_record( + &self, + entries: impl Iterator>, + ) -> (Vec, u64) { let mut sp_l = self.ctx.spec_pool.lock(); let mut ucp_l = self.ctx.uncommitted_pool.lock(); let mut conflicts = Vec::new(); @@ -579,7 +583,7 @@ impl RawCurp { conflicts.iter().filter(|c| **c).count().numeric_cast(), &[KeyValue::new("reason", "leader key conflict")], ); - conflicts + (conflicts, sp_l.version()) } /// Push one log, called by the leader @@ -1614,7 +1618,10 @@ impl RawCurp { EntryData::Command(ref cmd) => { let _ignore = ucp_l.insert(&PoolEntry::new(propose_id, Arc::clone(cmd))); } - EntryData::Shutdown | EntryData::Empty | EntryData::Member(_) => {} + EntryData::Shutdown + | EntryData::Empty + | EntryData::Member(_) + | EntryData::SpecPoolReplication(_) => {} } } } diff --git a/crates/curp/src/server/raw_curp/replication.rs b/crates/curp/src/server/raw_curp/replication.rs index 57e36cd45..62ed2b8d6 100644 --- a/crates/curp/src/server/raw_curp/replication.rs +++ b/crates/curp/src/server/raw_curp/replication.rs @@ -1,10 +1,8 @@ -use std::collections::HashSet; - use curp_external_api::{cmd::Command, role_change::RoleChange, LogIndex}; use tokio::sync::oneshot; use tracing::{debug, error, info}; -use crate::rpc::ProposeId; +use crate::{rpc::ProposeId, server::conflict::spec_pool_new::SpecPoolRepl}; use super::{AppendEntries, RawCurp, SyncAction}; @@ -30,9 +28,8 @@ pub(crate) enum Action { /// Contains the latest term. StepDown(u64), - /// Request to get all speculative pool entries - /// Contains a sender to send the entries. - GetSpecPoolEntryIds(oneshot::Sender>), + /// Request to replicate speculative pool entries + ReplicateSpecPoolSync, } impl RawCurp { @@ -68,13 +65,13 @@ impl RawCurp { info!("received greater term: {node_term}, stepping down."); self.step_down(node_term); } - Action::GetSpecPoolEntryIds(tx) => { - if tx - .send(self.spec_pool().lock().all_ids().copied().collect()) - .is_err() - { - error!("send spec pool entries failed"); - } + Action::ReplicateSpecPoolSync => { + let sp_l = self.ctx.spec_pool.lock(); + let ids = sp_l.all_ids().copied().collect(); + let next_version = sp_l.version().wrapping_add(1); + let entry = SpecPoolRepl::new(next_version, ids); + let propose_id = ProposeId(rand::random(), 0); + let _ignore = self.push_log_entry(propose_id, entry); } } } diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index 41982909a..d2ada3879 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -46,9 +46,10 @@ impl RawCurp { let curp_storage = Arc::new(DB::open(&curp_config.engine_cfg).unwrap()); let _ignore = curp_storage.recover().unwrap(); - let sp = Arc::new(Mutex::new(SpeculativePool::new(vec![Box::new( - TestSpecPool::default(), - )]))); + let sp = Arc::new(Mutex::new(SpeculativePool::new( + vec![Box::new(TestSpecPool::default())], + 0, + ))); let ucp = Arc::new(Mutex::new(UncommittedPool::new(vec![Box::new( TestUncomPool::default(), )]))); diff --git a/crates/curp/src/server/storage/db.rs b/crates/curp/src/server/storage/db.rs index 267d3c084..b5c0ecd10 100644 --- a/crates/curp/src/server/storage/db.rs +++ b/crates/curp/src/server/storage/db.rs @@ -16,6 +16,8 @@ const VOTE_FOR: &[u8] = b"VoteFor"; const CF: &str = "curp"; /// Column family name for members const MEMBERS_CF: &str = "members"; +/// Speculative pool version +const SP_VER: &[u8] = b"SPVer"; /// The sub dir for `RocksDB` files const ROCKSDB_SUB_DIR: &str = "rocksdb"; @@ -70,7 +72,17 @@ impl StorageApi for DB { .get(CF, VOTE_FOR)? .map(|bytes| bincode::deserialize::<(u64, ServerId)>(&bytes)) .transpose()?; - Ok((voted_for, entries)) + let sp_version = self + .db + .get(CF, SP_VER)? + .map(|bytes| { + bytes + .try_into() + .unwrap_or_else(|_| unreachable!("should be exactly 8 bytes")) + }) + // default to 0 + .map_or(0, u64::from_le_bytes); + Ok((voted_for, entries, sp_version)) } #[inline] @@ -153,7 +165,7 @@ mod tests { let storage_cfg = EngineConfig::RocksDB(db_dir.clone()); { let s = DB::::open(&storage_cfg)?; - let (voted_for, entries) = s.recover()?; + let (voted_for, entries, _) = s.recover()?; assert!(voted_for.is_none()); assert!(entries.is_empty()); s.flush_voted_for(1, 222)?; @@ -169,7 +181,7 @@ mod tests { { let s = DB::::open(&storage_cfg)?; - let (voted_for, entries) = s.recover()?; + let (voted_for, entries, _) = s.recover()?; assert_eq!(voted_for, Some((3, 111))); assert_eq!(entries[0].index, 1); assert_eq!(entries[1].index, 2); diff --git a/crates/curp/src/server/storage/mod.rs b/crates/curp/src/server/storage/mod.rs index 9f022acca..f30e60b24 100644 --- a/crates/curp/src/server/storage/mod.rs +++ b/crates/curp/src/server/storage/mod.rs @@ -35,8 +35,10 @@ impl From for StorageError { /// Vote info pub(crate) type VoteInfo = (u64, ServerId); +/// Speculative pool version +pub(crate) type SpVersion = u64; /// Recovered data -pub(crate) type RecoverData = (Option, Vec>); +pub(crate) type RecoverData = (Option, Vec>, SpVersion); /// Curp storage api #[allow(clippy::module_name_repetitions)] From b3866e22a136f9eeb1aadad53bc19ce08cf917e4 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 12 Nov 2024 16:38:45 +0800 Subject: [PATCH 315/322] refactor: for each follower implementation Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/cluster_state.rs | 55 +++----------------- crates/curp/src/client/fetch.rs | 18 ++++--- crates/curp/src/client/unary/propose_impl.rs | 11 ++-- 3 files changed, 27 insertions(+), 57 deletions(-) diff --git a/crates/curp/src/client/cluster_state.rs b/crates/curp/src/client/cluster_state.rs index e51f46622..a9e6f731e 100644 --- a/crates/curp/src/client/cluster_state.rs +++ b/crates/curp/src/client/cluster_state.rs @@ -181,49 +181,7 @@ impl ClusterStateFull { } /// Execute an operation on each follower, until a quorum is reached. - /// - /// Parameters: - /// - f: Operation to execute on each follower's connection - /// - filter: Function to filter on each response - /// - quorum: Function to determine if a quorum is reached, use functions in `QuorumSet` trait - /// - /// Returns `true` if then given quorum is reached. - pub(crate) async fn for_each_follower_with_quorum, F, Q>( - self, - mut f: impl FnMut(Arc) -> Fut, - mut filter: F, - mut expect_quorum: Q, - ) -> bool - where - F: FnMut(R) -> bool, - Q: FnMut(&dyn QuorumSet>, Vec) -> bool, - { - let qs = self.membership.as_joint(); - let leader_id = self.leader_id(); - - #[allow(clippy::pattern_type_mismatch)] - let stream: FuturesUnordered<_> = self - .member_connects() - .filter(|(id, _)| *id != leader_id) - .map(|(id, conn)| f(Arc::clone(conn)).map(move |r| (id, r))) - .collect(); - - let mut filtered = - stream.filter_map(|(id, r)| futures::future::ready(filter(r).then_some(id))); - - let mut ids = vec![leader_id]; - while let Some(id) = filtered.next().await { - ids.push(id); - if expect_quorum(&qs, ids.clone()) { - return true; - } - } - - false - } - - /// Execute an operation on each follower, until a quorum is reached. - pub(crate) async fn for_each_follower_with_expect< + pub(crate) async fn for_each_follower_until< Fut: Future, R, B, @@ -235,8 +193,8 @@ impl ClusterStateFull { self, mut f: impl FnMut(Arc) -> Fut, mut filter: FilterMap, - mut folder: Folder, mut b: B, + mut folder: Folder, mut expect: Expect, ) -> Option where @@ -475,12 +433,15 @@ mod test { }; let ok = state - .for_each_follower_with_quorum( + .for_each_follower_until( record, - |res| res.is_ok_and(|resp| resp.get_ref().conflict), + |res| res.ok().filter(|resp| resp.get_ref().conflict), + (), + |(ids, ()), (id, _)| ids.push(id), |qs, ids| QuorumSet::is_quorum(qs, ids), ) - .await; + .await + .is_some(); assert!(ok); } diff --git a/crates/curp/src/client/fetch.rs b/crates/curp/src/client/fetch.rs index 9636b7f56..2fbf22892 100644 --- a/crates/curp/src/client/fetch.rs +++ b/crates/curp/src/client/fetch.rs @@ -119,14 +119,20 @@ impl Fetch { let timeout = self.timeout; let term = state.term(); let fetch_membership = move |c: Arc| async move { - c.fetch_membership(FetchMembershipRequest {}, timeout).await + c.fetch_membership(FetchMembershipRequest {}, timeout) + .await + .map(Response::into_inner) }; - state.for_each_follower_with_quorum( - fetch_membership, - move |r| r.is_ok_and(|ok| ok.get_ref().term == term), - |qs, ids| QuorumSet::is_quorum(qs, ids), - ) + state + .for_each_follower_until( + fetch_membership, + move |r| r.ok().filter(|ok| ok.term == term), + (), + |(ids, ()), (id, _)| ids.push(id), + |qs, ids| QuorumSet::is_quorum(qs, ids), + ) + .map(|x| x.is_some()) } /// Fetch cluster state from leader diff --git a/crates/curp/src/client/unary/propose_impl.rs b/crates/curp/src/client/unary/propose_impl.rs index 6f51ea446..6f456de55 100644 --- a/crates/curp/src/client/unary/propose_impl.rs +++ b/crates/curp/src/client/unary/propose_impl.rs @@ -181,12 +181,15 @@ impl Unary { move |conn: Arc| async move { conn.read_index(timeout).await }; ctx.cluster_state() - .for_each_follower_with_quorum( + .for_each_follower_until( read_index, - move |res| res.is_ok_and(|resp| resp.get_ref().term == term), + move |res| res.ok().filter(|resp| resp.get_ref().term == term), + (), + |(ids, ()), (id, _)| ids.push(id), |qs, ids| QuorumSet::is_quorum(qs, ids), ) .await + .is_some() } /// Send record requests to the cluster @@ -206,9 +209,10 @@ impl Unary { let stream = ctx .cluster_state() - .for_each_follower_with_expect( + .for_each_follower_until( record, |res| res.ok().filter(|r| !r.conflict).map(|r| r.sp_version), + 0, |(ids, latest), (id, sp_version)| { if sp_version > latest { ids.clear(); @@ -218,7 +222,6 @@ impl Unary { latest } }, - 0, |qs, ids| qs.is_super_quorum(ids), ) .map(move |ok| ProposeEvent::Record { sp_version: ok }) From f3df69542384babaeb06de8f00cb1b757f9e0355 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 12 Nov 2024 20:52:42 +0800 Subject: [PATCH 316/322] test: add sp version tests Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/client/tests.rs | 96 +++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/crates/curp/src/client/tests.rs b/crates/curp/src/client/tests.rs index 6cb792428..b6e6e2a68 100644 --- a/crates/curp/src/client/tests.rs +++ b/crates/curp/src/client/tests.rs @@ -74,6 +74,15 @@ fn build_propose_response(conflict: bool) -> OpResponse { OpResponse { op: Some(resp) } } +fn build_propose_response_with_sp_ver(conflict: bool, sp_version: u64) -> OpResponse { + let resp = ResponseOp::Propose(ProposeResponse::new_result::( + &Ok(TestCommandResult::default()), + conflict, + sp_version, + )); + OpResponse { op: Some(resp) } +} + fn build_synced_response() -> OpResponse { let resp = ResponseOp::Synced(SyncedResponse::new_result::(&Ok(1.into()))); OpResponse { op: Some(resp) } @@ -549,3 +558,90 @@ async fn test_read_index_fail() { .await; assert!(res.is_err()); } + +async fn assert_slow_path(connects: HashMap>) { + let unary = init_unary_client(None, None); + let cluster_state = ClusterStateFull::new(0, 1, connects, build_default_membership()); + let ctx = Context::new(ProposeId::default(), cluster_state); + let start_at = Instant::now(); + let res = unary + .propose(&TestCommand::new_put(vec![1], 1), None, true, ctx) + .await + .unwrap() + .unwrap(); + assert!( + start_at.elapsed() > Duration::from_millis(100), + "slow round takes at least 100ms" + ); + // indicate that we actually run out of fast round + assert_eq!( + res, + (TestCommandResult::default(), Some(LogIndexResult::from(1))) + ); +} + +#[traced_test] +#[tokio::test] +async fn test_unary_propose_sp_version_mismatch_fallback_case1() { + let connects = init_mocked_connects(5, |id, conn| { + conn.expect_propose_stream() + .return_once(move |_req, _token, _timeout| { + assert_eq!(id, 0, "followers should not receive propose"); + let resp = async_stream::stream! { + yield Ok(build_propose_response_with_sp_ver(false, 1)); + tokio::time::sleep(Duration::from_millis(100)).await; + yield Ok(build_synced_response()); + }; + Ok(tonic::Response::new(Box::new(resp))) + }); + conn.expect_record().return_once(move |_req, _timeout| { + let resp = match id { + 0 => unreachable!("leader should not receive record request"), + 1 | 2 => RecordResponse { + conflict: false, + sp_version: 1, + }, + // outdated + 3 | 4 => RecordResponse { + conflict: false, + sp_version: 0, + }, + _ => unreachable!("there are only 5 nodes"), + }; + Ok(tonic::Response::new(resp)) + }); + }); + + assert_slow_path(connects).await; +} + +#[traced_test] +#[tokio::test] +async fn test_unary_propose_sp_version_mismatch_fallback_case2() { + let connects = init_mocked_connects(5, |id, conn| { + conn.expect_propose_stream() + .return_once(move |_req, _token, _timeout| { + assert_eq!(id, 0, "followers should not receive propose"); + let resp = async_stream::stream! { + yield Ok(build_propose_response_with_sp_ver(false, 1)); + tokio::time::sleep(Duration::from_millis(100)).await; + yield Ok(build_synced_response()); + }; + Ok(tonic::Response::new(Box::new(resp))) + }); + conn.expect_record().return_once(move |_req, _timeout| { + let resp = match id { + 0 => unreachable!("leader should not receive record request"), + // all outdated + 1 | 2 | 3 | 4 => RecordResponse { + conflict: false, + sp_version: 0, + }, + _ => unreachable!("there are only 5 nodes"), + }; + Ok(tonic::Response::new(resp)) + }); + }); + + assert_slow_path(connects).await; +} From 617ba1247b011e8aab25e96075cd1e037960f6e1 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 12 Nov 2024 21:06:28 +0800 Subject: [PATCH 317/322] chore: remove unused sync spec pool RPC Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/proto/inner_message.proto | 8 ------- crates/curp/src/rpc/connect/lazy.rs | 18 ++-------------- crates/curp/src/rpc/connect/mod.rs | 28 +------------------------ crates/curp/src/rpc/mod.rs | 6 +++--- crates/curp/src/server/curp_node/mod.rs | 23 ++------------------ crates/curp/src/server/mod.rs | 12 ----------- crates/curp/src/server/raw_curp/mod.rs | 6 ------ 7 files changed, 8 insertions(+), 93 deletions(-) diff --git a/crates/curp/proto/inner_message.proto b/crates/curp/proto/inner_message.proto index e8d249b0f..f8b89d680 100644 --- a/crates/curp/proto/inner_message.proto +++ b/crates/curp/proto/inner_message.proto @@ -54,13 +54,6 @@ message TryBecomeLeaderNowRequest {} message TryBecomeLeaderNowResponse {} -message SyncSpecPoolRequest { - uint64 term = 1; - bytes ids = 2; -} - -message SyncSpecPoolResponse {} - service InnerProtocol { rpc AppendEntries(AppendEntriesRequest) returns (AppendEntriesResponse); rpc Vote(VoteRequest) returns (VoteResponse); @@ -69,5 +62,4 @@ service InnerProtocol { rpc TriggerShutdown(TriggerShutdownRequest) returns (TriggerShutdownResponse); rpc TryBecomeLeaderNow(TryBecomeLeaderNowRequest) returns (TryBecomeLeaderNowResponse); - rpc SyncSpecPool(SyncSpecPoolRequest) returns (SyncSpecPoolResponse); } diff --git a/crates/curp/src/rpc/connect/lazy.rs b/crates/curp/src/rpc/connect/lazy.rs index da48248c7..e186cd392 100644 --- a/crates/curp/src/rpc/connect/lazy.rs +++ b/crates/curp/src/rpc/connect/lazy.rs @@ -17,8 +17,8 @@ use crate::{ AppendEntriesRequest, AppendEntriesResponse, ChangeMembershipRequest, CurpError, FetchMembershipRequest, InstallSnapshotResponse, MembershipResponse, MoveLeaderRequest, MoveLeaderResponse, OpResponse, ProposeRequest, ReadIndexResponse, RecordRequest, - RecordResponse, ShutdownRequest, ShutdownResponse, SyncSpecPoolRequest, - SyncSpecPoolResponse, VoteRequest, VoteResponse, WaitLearnerRequest, WaitLearnerResponse, + RecordResponse, ShutdownRequest, ShutdownResponse, VoteRequest, VoteResponse, + WaitLearnerRequest, WaitLearnerResponse, }, snapshot::Snapshot, }; @@ -131,20 +131,6 @@ impl InnerConnectApi for ConnectLazy> { self.connect_inner(&mut inner); inner.as_ref().unwrap().try_become_leader_now(timeout).await } - - async fn sync_spec_pool( - &self, - request: SyncSpecPoolRequest, - timeout: Duration, - ) -> Result, tonic::Status> { - let mut inner = self.inner.lock().await; - self.connect_inner(&mut inner); - inner - .as_ref() - .unwrap() - .sync_spec_pool(request, timeout) - .await - } } #[allow(clippy::unwrap_used)] diff --git a/crates/curp/src/rpc/connect/mod.rs b/crates/curp/src/rpc/connect/mod.rs index 781f86af5..37e15a442 100644 --- a/crates/curp/src/rpc/connect/mod.rs +++ b/crates/curp/src/rpc/connect/mod.rs @@ -53,8 +53,7 @@ use super::{ proto::commandpb::{ReadIndexRequest, ReadIndexResponse}, reconnect::Reconnect, ChangeMembershipRequest, FetchMembershipRequest, MembershipResponse, OpResponse, RecordRequest, - RecordResponse, SyncSpecPoolRequest, SyncSpecPoolResponse, WaitLearnerRequest, - WaitLearnerResponse, + RecordResponse, WaitLearnerRequest, WaitLearnerResponse, }; /// Install snapshot chunk size: 64KB @@ -274,13 +273,6 @@ pub(crate) trait InnerConnectApi: Send + Sync + 'static { /// Send `TryBecomeLeaderNowRequest` async fn try_become_leader_now(&self, timeout: Duration) -> Result<(), tonic::Status>; - - /// Send `SyncSpecPoolRequest` - async fn sync_spec_pool( - &self, - request: SyncSpecPoolRequest, - timeout: Duration, - ) -> Result, tonic::Status>; } /// Inner Connect Api Wrapper @@ -624,24 +616,6 @@ impl InnerConnectApi for Connect> { result.map(|_| ()) } - - async fn sync_spec_pool( - &self, - request: SyncSpecPoolRequest, - timeout: Duration, - ) -> Result, tonic::Status> { - #[cfg(feature = "client-metrics")] - let start_at = self.before_rpc::(); - - let mut client = self.rpc_connect.clone(); - let req = tonic::Request::new(request); - let result = with_timeout!(timeout, client.sync_spec_pool(req)); - - #[cfg(feature = "client-metrics")] - self.after_rpc(start_at, &result); - - result - } } /// A connect api implementation which bypass kernel to dispatch method diff --git a/crates/curp/src/rpc/mod.rs b/crates/curp/src/rpc/mod.rs index 05fe04219..fa1b128fb 100644 --- a/crates/curp/src/rpc/mod.rs +++ b/crates/curp/src/rpc/mod.rs @@ -11,9 +11,9 @@ pub(crate) use self::proto::{ commandpb::CurpError as CurpErrorWrapper, inner_messagepb::{ inner_protocol_server::InnerProtocol, AppendEntriesRequest, AppendEntriesResponse, - InstallSnapshotRequest, InstallSnapshotResponse, SyncSpecPoolRequest, SyncSpecPoolResponse, - TriggerShutdownRequest, TriggerShutdownResponse, TryBecomeLeaderNowRequest, - TryBecomeLeaderNowResponse, VoteRequest, VoteResponse, + InstallSnapshotRequest, InstallSnapshotResponse, TriggerShutdownRequest, + TriggerShutdownResponse, TryBecomeLeaderNowRequest, TryBecomeLeaderNowResponse, + VoteRequest, VoteResponse, }, }; pub use self::proto::{ diff --git a/crates/curp/src/server/curp_node/mod.rs b/crates/curp/src/server/curp_node/mod.rs index 1daa283e6..a76536c6d 100644 --- a/crates/curp/src/server/curp_node/mod.rs +++ b/crates/curp/src/server/curp_node/mod.rs @@ -48,9 +48,8 @@ use crate::{ InstallSnapshotRequest, InstallSnapshotResponse, MembershipResponse, MoveLeaderRequest, MoveLeaderResponse, PoolEntry, ProposeId, ProposeRequest, ProposeResponse, ReadIndexResponse, RecordRequest, RecordResponse, ShutdownRequest, ShutdownResponse, - SyncSpecPoolRequest, SyncSpecPoolResponse, SyncedResponse, TriggerShutdownRequest, - TriggerShutdownResponse, TryBecomeLeaderNowRequest, TryBecomeLeaderNowResponse, - VoteRequest, VoteResponse, + SyncedResponse, TriggerShutdownRequest, TriggerShutdownResponse, TryBecomeLeaderNowRequest, + TryBecomeLeaderNowResponse, VoteRequest, VoteResponse, }, server::{ cmd_worker::{after_sync, worker_reset, worker_snapshot}, @@ -596,24 +595,6 @@ impl, RC: RoleChange> CurpNode { } Ok(TryBecomeLeaderNowResponse::default()) } - - /// Handle `SyncSpecPool` request - pub(super) fn sync_spec_pool( - &self, - req: &SyncSpecPoolRequest, - ) -> Result { - let current_term = self.curp.term(); - if req.term < current_term { - return Err(CurpError::internal(format!( - "invalid leader term, current: {current_term}" - ))); - } - let entries: Vec = bincode::deserialize(&req.ids) - .unwrap_or_else(|_| unreachable!("failed to deserialize spec pool data")); - self.curp.gc_spec_pool(&entries.into_iter().collect()); - - Ok(SyncSpecPoolResponse::default()) - } } /// Spawned tasks diff --git a/crates/curp/src/server/mod.rs b/crates/curp/src/server/mod.rs index 59759da27..3e5b2acab 100644 --- a/crates/curp/src/server/mod.rs +++ b/crates/curp/src/server/mod.rs @@ -40,8 +40,6 @@ use crate::rpc::RecordRequest; use crate::rpc::RecordResponse; use crate::rpc::ShutdownRequest; use crate::rpc::ShutdownResponse; -use crate::rpc::SyncSpecPoolRequest; -use crate::rpc::SyncSpecPoolResponse; use crate::rpc::TriggerShutdownRequest; use crate::rpc::TriggerShutdownResponse; use crate::rpc::TryBecomeLeaderNowRequest; @@ -255,16 +253,6 @@ impl, RC: RoleChange> crate::rpc::InnerProtoc self.inner.try_become_leader_now(request.get_ref()).await?, )) } - - #[instrument(skip_all, name = "curp_sync_spec_pool")] - async fn sync_spec_pool( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - Ok(tonic::Response::new( - self.inner.sync_spec_pool(request.get_ref())?, - )) - } } /// Used for bypassed connect because the `Protocol` trait requires `tonic::Streaming` diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index f6e3d8da6..327ed95e5 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -15,7 +15,6 @@ use std::cmp::min; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::HashMap; -use std::collections::HashSet; use std::fmt::Debug; use std::iter; use std::sync::atomic::AtomicU8; @@ -1353,11 +1352,6 @@ impl RawCurp { self.ctx.node_states.all_states() } - /// Performs garbage collection on the spec pool with given entries from the leader - pub(super) fn gc_spec_pool(&self, leader_entry_ids: &HashSet) { - self.ctx.spec_pool.lock().gc(leader_entry_ids); - } - #[cfg(test)] /// Get a range of log entry pub(crate) fn get_log_from(&self, idx: u64) -> Vec>> { From 8c797d651f9cce52e6422884a42a0734a809ab4f Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 12 Nov 2024 21:09:38 +0800 Subject: [PATCH 318/322] chore: merge gc and update_version Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/cmd_worker/mod.rs | 3 +-- crates/curp/src/server/conflict/spec_pool_new.rs | 10 +++------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/crates/curp/src/server/cmd_worker/mod.rs b/crates/curp/src/server/cmd_worker/mod.rs index f12697bb1..778660a7b 100644 --- a/crates/curp/src/server/cmd_worker/mod.rs +++ b/crates/curp/src/server/cmd_worker/mod.rs @@ -159,8 +159,7 @@ fn after_sync_others, RC: RoleChange>( (EntryData::Member(_), _) => {} (EntryData::SpecPoolReplication(r), _) => { let mut sp_l = curp.spec_pool().lock(); - sp_l.update_version(r.version()); - sp_l.gc(r.ids()); + sp_l.gc(r.ids(), r.version()); } _ => unreachable!(), diff --git a/crates/curp/src/server/conflict/spec_pool_new.rs b/crates/curp/src/server/conflict/spec_pool_new.rs index ab779663c..9900717b1 100644 --- a/crates/curp/src/server/conflict/spec_pool_new.rs +++ b/crates/curp/src/server/conflict/spec_pool_new.rs @@ -86,7 +86,9 @@ impl SpeculativePool { /// Performs garbage collection on the spec pool with given entries from the leader /// /// Removes entries from the pool that are not present in the provided `leader_entries` - pub(crate) fn gc(&mut self, leader_entry_ids: &HashSet) { + pub(crate) fn gc(&mut self, leader_entry_ids: &HashSet, version: u64) { + debug_assert!(version > self.version, "invalid version: {version}"); + self.version = version; let to_remove: Vec<_> = self .entries .keys() @@ -102,12 +104,6 @@ impl SpeculativePool { pub(crate) fn version(&self) -> u64 { self.version } - - /// Updates the current version - pub(crate) fn update_version(&mut self, version: u64) { - debug_assert!(version > self.version, "invalid version: {version}"); - self.version = version; - } } /// A Speculative Pool log entry From 1d2340c254918172ada33850c753853b5299cfed Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 12 Nov 2024 21:19:07 +0800 Subject: [PATCH 319/322] refactor: persistent sp version Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/cmd_worker/mod.rs | 5 +++-- crates/curp/src/server/raw_curp/mod.rs | 15 +++++++++++++++ crates/curp/src/server/storage/db.rs | 7 +++++++ crates/curp/src/server/storage/mod.rs | 6 ++++++ 4 files changed, 31 insertions(+), 2 deletions(-) diff --git a/crates/curp/src/server/cmd_worker/mod.rs b/crates/curp/src/server/cmd_worker/mod.rs index 778660a7b..9a6356996 100644 --- a/crates/curp/src/server/cmd_worker/mod.rs +++ b/crates/curp/src/server/cmd_worker/mod.rs @@ -158,8 +158,9 @@ fn after_sync_others, RC: RoleChange>( (EntryData::Empty, _) => curp.set_no_op_applied(), (EntryData::Member(_), _) => {} (EntryData::SpecPoolReplication(r), _) => { - let mut sp_l = curp.spec_pool().lock(); - sp_l.gc(r.ids(), r.version()); + if let Err(err) = curp.gc_spec_pool(r.ids(), r.version()) { + error!("failed to gc spec pool: {err:?}"); + } } _ => unreachable!(), diff --git a/crates/curp/src/server/raw_curp/mod.rs b/crates/curp/src/server/raw_curp/mod.rs index 327ed95e5..a7191151b 100644 --- a/crates/curp/src/server/raw_curp/mod.rs +++ b/crates/curp/src/server/raw_curp/mod.rs @@ -15,6 +15,7 @@ use std::cmp::min; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::HashMap; +use std::collections::HashSet; use std::fmt::Debug; use std::iter; use std::sync::atomic::AtomicU8; @@ -1690,4 +1691,18 @@ impl RawCurp { let _ignore = self.ctx.monitoring.write().remove(&id); } } + + /// Garbage collect the spec pool + pub(crate) fn gc_spec_pool( + &self, + ids: &HashSet, + version: u64, + ) -> Result<(), CurpError> { + let mut sp_l = self.ctx.spec_pool.lock(); + sp_l.gc(ids, version); + self.ctx + .curp_storage + .put_sp_version(version) + .map_err(Into::into) + } } diff --git a/crates/curp/src/server/storage/db.rs b/crates/curp/src/server/storage/db.rs index b5c0ecd10..3c27b81b2 100644 --- a/crates/curp/src/server/storage/db.rs +++ b/crates/curp/src/server/storage/db.rs @@ -104,6 +104,13 @@ impl StorageApi for DB { .transpose() .map_err(Into::into) } + + #[inline] + fn put_sp_version(&self, version: u64) -> Result<(), StorageError> { + let data = version.to_le_bytes(); + let op = WriteOperation::new_put(CF, SP_VER.to_vec(), data.to_vec()); + self.db.write_multi(vec![op], true).map_err(Into::into) + } } impl DB { diff --git a/crates/curp/src/server/storage/mod.rs b/crates/curp/src/server/storage/mod.rs index f30e60b24..f9c2b8072 100644 --- a/crates/curp/src/server/storage/mod.rs +++ b/crates/curp/src/server/storage/mod.rs @@ -80,6 +80,12 @@ pub trait StorageApi: Send + Sync { /// # Errors /// Return `StorageError` when it failed to recover the membership from underlying database. fn recover_membership(&self) -> Result, StorageError>; + + /// Put speculative pool version into the storage + /// + /// # Errors + /// Return `StorageError` when it failed to put to the underlying database + fn put_sp_version(&self, version: u64) -> Result<(), StorageError>; } /// CURP `DB` storage implementation From 5943fc155374b32ee8524a44fd57b06f4d6bea93 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 12 Nov 2024 21:21:36 +0800 Subject: [PATCH 320/322] test: add sp gc tests for curp server Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/raw_curp/tests.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/crates/curp/src/server/raw_curp/tests.rs b/crates/curp/src/server/raw_curp/tests.rs index d2ada3879..d1be3d869 100644 --- a/crates/curp/src/server/raw_curp/tests.rs +++ b/crates/curp/src/server/raw_curp/tests.rs @@ -829,3 +829,15 @@ fn leader_will_reset_transferee_after_it_become_follower() { curp.update_to_term_and_become_follower(&mut *curp.st.write(), 2); assert!(curp.get_transferee().is_none()); } + +#[traced_test] +#[test] +fn gc_spec_pool_should_update_version_and_persistent() { + let task_manager = Arc::new(TaskManager::new()); + let curp = { Arc::new(RawCurp::new_test(5, mock_role_change(), task_manager)) }; + assert_eq!(curp.ctx.spec_pool.lock().version(), 0); + curp.gc_spec_pool(&HashSet::new(), 2).unwrap(); + assert_eq!(curp.ctx.spec_pool.lock().version(), 2); + let (_, _, version) = curp.ctx.curp_storage.recover().unwrap(); + assert_eq!(version, 2); +} From 473ca27af8b904f0b100f1210807e743b453fac2 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 12 Nov 2024 21:28:07 +0800 Subject: [PATCH 321/322] fix: allow gc with the same version(by ignoring) Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/src/server/conflict/spec_pool_new.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/curp/src/server/conflict/spec_pool_new.rs b/crates/curp/src/server/conflict/spec_pool_new.rs index 9900717b1..82e815aac 100644 --- a/crates/curp/src/server/conflict/spec_pool_new.rs +++ b/crates/curp/src/server/conflict/spec_pool_new.rs @@ -2,6 +2,7 @@ use std::collections::{HashMap, HashSet}; use curp_external_api::conflict::SpeculativePoolOp; use serde::{Deserialize, Serialize}; +use tracing::warn; use crate::rpc::{PoolEntry, ProposeId}; @@ -87,7 +88,11 @@ impl SpeculativePool { /// /// Removes entries from the pool that are not present in the provided `leader_entries` pub(crate) fn gc(&mut self, leader_entry_ids: &HashSet, version: u64) { - debug_assert!(version > self.version, "invalid version: {version}"); + debug_assert!(version >= self.version, "invalid version: {version}"); + if version == self.version { + warn!("gc receives current version, the cluster might gc too frequently, ignoring"); + return; + } self.version = version; let to_remove: Vec<_> = self .entries From 9ea6b0bc2a24cc3c991e1fb3a01b60b83f5edd06 Mon Sep 17 00:00:00 2001 From: bsbds <69835502+bsbds@users.noreply.github.com> Date: Tue, 12 Nov 2024 21:37:26 +0800 Subject: [PATCH 322/322] test: add sp gc tests for curp server Signed-off-by: bsbds <69835502+bsbds@users.noreply.github.com> --- crates/curp/tests/it/server.rs | 68 ++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/crates/curp/tests/it/server.rs b/crates/curp/tests/it/server.rs index 47833362d..ab97cd074 100644 --- a/crates/curp/tests/it/server.rs +++ b/crates/curp/tests/it/server.rs @@ -886,6 +886,22 @@ async fn record_to_node( .conflict } +async fn record_to_node_get_version( + connect: &mut ProtocolClient, + propose_id: ProposeId, + command: Vec, +) -> u64 { + connect + .record(tonic::Request::new(RecordRequest { + propose_id: Some(propose_id), + command, + })) + .await + .unwrap() + .into_inner() + .sp_version +} + #[tokio::test(flavor = "multi_thread")] async fn curp_server_spec_pool_gc_ok() { init_logger(); @@ -1000,3 +1016,55 @@ async fn curp_server_spec_pool_gc_should_not_remove_leader_entry() { .await; assert!(conflict); } + +#[tokio::test(flavor = "multi_thread")] +async fn curp_server_spec_pool_gc_should_update_version() { + init_logger(); + // sets the initail sync interval to a relatively long duration + let group = CurpGroup::new_with_custom_sp_sync_interval(5, Duration::from_secs(1)).await; + let client = group.new_client().await; + + let leader = client.fetch_leader_id(true).await.unwrap(); + let follower_id = group.nodes.keys().find(|&id| &leader != id).unwrap(); + let mut follower_connect = group.get_connect(follower_id).await; + let cmd0 = bincode::serialize(&TestCommand::new_put(vec![0], 0)).unwrap(); + let mut ticker = tokio::time::interval(Duration::from_millis(1100)); + ticker.tick().await; + + let version0 = record_to_node_get_version( + &mut follower_connect, + ProposeId { + client_id: 0, + seq_num: 0, + }, + cmd0.clone(), + ) + .await; + assert_eq!(version0, 0); + + ticker.tick().await; + + let version1 = record_to_node_get_version( + &mut follower_connect, + ProposeId { + client_id: 1, + seq_num: 0, + }, + cmd0.clone(), + ) + .await; + assert_eq!(version1, 1); + + ticker.tick().await; + + let version2 = record_to_node_get_version( + &mut follower_connect, + ProposeId { + client_id: 2, + seq_num: 0, + }, + cmd0.clone(), + ) + .await; + assert_eq!(version2, 2); +}