From 5465f1bc58e9b17a9d5e0a917d3432644c6bec1d Mon Sep 17 00:00:00 2001 From: Ivan Druzhitskiy Date: Thu, 7 Sep 2023 16:18:00 +0300 Subject: [PATCH 1/5] make ping parallel --- bob/src/link_manager.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/bob/src/link_manager.rs b/bob/src/link_manager.rs index d8f6e29e5..b2f1b6bab 100644 --- a/bob/src/link_manager.rs +++ b/bob/src/link_manager.rs @@ -57,21 +57,22 @@ impl LinkManager { interval.tick().await; let mut err_cnt = 0; let mut status = String::from("Node status: "); - for node in nodes.iter() { - if let Err(e) = node.check(&factory).await { + let mut futures :FuturesUnordered<_> = nodes.iter().map(|n| n.check(&factory) .map(|r| (n.name().clone(), r))).collect(); + while let Some((name, res)) = futures.next().await { + if let Err(e) = res { if log_in_this_iter { error!( "No connection to {}:[{}] - {}", - node.name(), - node.address(), + name, + nodes.iter().find(|n| n.name() == &name).unwrap().address(), e ); - status += &format!("[-]{:<10} ", node.name()); + status += &format!("[-]{:<10} ", name); } err_cnt += 1; } else { if log_in_this_iter { - status += &format!("[+]{:<10} ", node.name()); + status += &format!("[+]{:<10} ", name); } } } From 822517c877f2228787136374e4942db003a50637 Mon Sep 17 00:00:00 2001 From: Ivan Druzhitskiy Date: Thu, 7 Sep 2023 16:42:05 +0300 Subject: [PATCH 2/5] add check timeout --- bob-apps/bin/bobd.rs | 6 +++++- bob-backend/src/pearl/tests.rs | 1 + bob-common/src/bob_client.rs | 26 ++++++++++++++++++------- bob-common/src/configs/cluster_tests.rs | 13 +++++++++++++ bob-common/src/configs/node.rs | 15 ++++++++++++++ 5 files changed, 53 insertions(+), 8 deletions(-) diff --git a/bob-apps/bin/bobd.rs b/bob-apps/bin/bobd.rs index 5167c47b9..cefe4082c 100755 --- a/bob-apps/bin/bobd.rs +++ b/bob-apps/bin/bobd.rs @@ -245,7 +245,11 @@ async fn run_server(node: NodeConfig, authenticator: A, mapper tls_domain_name: tls_config.domain_name.clone(), } }); - let factory = Factory::new(node.operation_timeout(), metrics, node.name().into(), factory_tls_config); + let factory = Factory::new(node.operation_timeout(), + node.check_timeout(), + metrics, + node.name().into(), + factory_tls_config); let mut server_builder = Server::builder(); if let Some(node_tls_config) = node.tls_config().as_ref().and_then(|tls_config| tls_config.grpc_config()) { diff --git a/bob-backend/src/pearl/tests.rs b/bob-backend/src/pearl/tests.rs index c6a33e230..585c1344d 100755 --- a/bob-backend/src/pearl/tests.rs +++ b/bob-backend/src/pearl/tests.rs @@ -32,6 +32,7 @@ users_config: users.yaml name: local_node quorum: 1 operation_timeout: 3sec +check_timeout: 3sec check_interval: 5000ms cluster_policy: quorum # quorum backend_type: pearl # in_memory, stub, pearl diff --git a/bob-common/src/bob_client.rs b/bob-common/src/bob_client.rs index 54deb7eb8..522fa933d 100644 --- a/bob-common/src/bob_client.rs +++ b/bob-common/src/bob_client.rs @@ -31,6 +31,7 @@ pub mod b_client { local_node_name: NodeName, operation_timeout: Duration, + ping_timeout: Duration, auth_header: String, metrics: BobClientMetrics, } @@ -42,6 +43,7 @@ pub mod b_client { pub async fn create( node: &Node, operation_timeout: Duration, + ping_timeout: Duration, metrics: BobClientMetrics, local_node_name: NodeName, tls_config: Option<&FactoryTlsConfig>, @@ -66,11 +68,12 @@ pub mod b_client { Ok(Self { client, target_node_name: node.name().clone(), - target_node_address: node.address().to_owned(), - local_node_name: local_node_name, - operation_timeout: operation_timeout, - auth_header: auth_header, - metrics: metrics + target_node_address: node.address().to_owned(), + local_node_name, + operation_timeout, + ping_timeout, + auth_header, + metrics }) } @@ -154,7 +157,7 @@ pub mod b_client { let mut req = Request::new(Null {}); self.set_credentials(&mut req); self.set_node_name(&mut req); - self.set_timeout(&mut req); + self.set_ping_timeout(&mut req); let node_name = self.target_node_name.to_owned(); let mut client = self.client.clone(); @@ -242,11 +245,16 @@ pub mod b_client { fn set_timeout(&self, r: &mut Request) { r.set_timeout(self.operation_timeout); } + + fn set_ping_timeout(&self, r: &mut Request) { + r.set_timeout(self.ping_timeout); + } } mock! { pub BobClient { - pub async fn create<'a>(node: &Node, operation_timeout: Duration, metrics: BobClientMetrics, local_node_name: NodeName, tls_config: Option<&'a FactoryTlsConfig>) -> Result; + pub async fn create<'a>(node: &Node, operation_timeout: Duration, check_timeout: Duration, + metrics: BobClientMetrics, local_node_name: NodeName, tls_config: Option<&'a FactoryTlsConfig>) -> Result; pub async fn put(&self, key: BobKey, d: BobData, options: PutOptions) -> PutResult; pub async fn get(&self, key: BobKey, options: GetOptions) -> GetResult; pub async fn ping(&self) -> PingResult; @@ -314,6 +322,7 @@ pub struct FactoryTlsConfig { #[derive(Clone)] pub struct Factory { operation_timeout: Duration, + ping_timeout: Duration, metrics: Arc, local_node_name: NodeName, tls_config: Option, @@ -324,12 +333,14 @@ impl Factory { #[must_use] pub fn new( operation_timeout: Duration, + ping_timeout: Duration, metrics: Arc, local_node_name: NodeName, tls_config: Option, ) -> Self { Factory { operation_timeout, + ping_timeout, metrics, local_node_name, tls_config, @@ -340,6 +351,7 @@ impl Factory { BobClient::create( node, self.operation_timeout, + self.ping_timeout, metrics, self.local_node_name.clone(), self.tls_config.as_ref(), diff --git a/bob-common/src/configs/cluster_tests.rs b/bob-common/src/configs/cluster_tests.rs index 14fbc9e88..be1205b1e 100755 --- a/bob-common/src/configs/cluster_tests.rs +++ b/bob-common/src/configs/cluster_tests.rs @@ -509,6 +509,7 @@ users_config: users.yaml name: no quorum: 1 operation_timeout: 12h 5min 2ns +check_timeout: 12h 5min 2ns check_interval: 100ms cluster_policy: quorum # quorum backend_type: stub @@ -526,6 +527,7 @@ users_config: users.yaml name: no quorum: 1 operation_timeout: 12h 5min 2ns +check_timeout: 12h 5min 2ns check_interval: 100ms cluster_policy: quorum # quorum backend_type: pearl @@ -543,6 +545,7 @@ users_config: users.yaml name: no quorum: 1 operation_timeout: 12h 5min 2ns +check_timeout: 12h 5min 2ns check_interval: 100ms cluster_policy: quorum # quorum backend_type: pearl @@ -571,6 +574,7 @@ users_config: users.yaml name: no quorum: 1 operation_timeout: 12h 5min 2ns +check_timeout: 12h 5min 2ns check_interval: 100ms cluster_policy: quorum # quorum backend_type: pearl @@ -599,6 +603,7 @@ users_config: users.yaml name: no quorum: 1 operation_timeout: 12h 5min 2ns +check_timeout: 12h 5min 2ns check_interval: 100ms cluster_policy: quorum # quorum backend_type: pearl @@ -627,6 +632,7 @@ users_config: users.yaml name: no quorum: 1 operation_timeout: 12h 5min 2ns +check_timeout: 12h 5min 2ns check_interval: 100ms cluster_policy: quorum # quorum backend_type: pearl @@ -655,6 +661,7 @@ users_config: users.yaml name: n1 quorum: 1 operation_timeout: 12h 5min 2ns +check_timeout: 12h 5min 2ns check_interval: 100sec cluster_policy: quorum # quorum backend_type: InvalidType @@ -690,6 +697,7 @@ users_config: users.yaml name: no quorum: 1 operation_timeout: 12h 5min 2ns +check_timeout: 12h 5min 2ns check_interval: 100mms cluster_policy: quorum # quorum backend_type: stub @@ -707,6 +715,7 @@ users_config: users.yaml name: n1 quorum: 1 operation_timeout: 12h 5min 2ns +check_timeout: 12h 5min 2ns check_interval: 100sec cluster_policy: quorum # quorum backend_type: stub @@ -741,6 +750,7 @@ users_config: users.yaml name: 1n2112321321321321 quorum: 1 operation_timeout: 12h 5min 2ns +check_timeout: 12h 5min 2ns check_interval: 100sec cluster_policy: quorum # quorum backend_type: stub @@ -775,6 +785,7 @@ users_config: users.yaml name: no quorum: 1 operation_timeout: 12h 5min 2ns +check_timeout: 12h 5min 2ns check_interval: 100ms cluster_policy: quorum # quorum backend_type: stub @@ -798,6 +809,7 @@ users_config: users.yaml name: no quorum: 1 operation_timeout: 12h 5min 2ns +check_timeout: 12h 5min 2ns check_interval: 100ms cluster_policy: quorum # quorum backend_type: stub @@ -820,6 +832,7 @@ users_config: users.yaml name: no quorum: 1 operation_timeout: 12h 5min 2ns +check_timeout: 12h 5min 2ns check_interval: 100ms cluster_policy: quorum # quorum backend_type: stub diff --git a/bob-common/src/configs/node.rs b/bob-common/src/configs/node.rs index b6033b797..3173983ac 100755 --- a/bob-common/src/configs/node.rs +++ b/bob-common/src/configs/node.rs @@ -556,6 +556,7 @@ pub struct Node { name: String, quorum: usize, operation_timeout: String, + check_timeout: String, check_interval: String, #[serde(default = "NodeConfig::default_count_interval")] count_interval: String, @@ -657,6 +658,14 @@ impl NodeConfig { .into() } + /// Get check request operation timeout, parsed from humantime format. + pub fn check_timeout(&self) -> Duration { + self.check_timeout + .parse::() + .expect("parse humantime duration") + .into() + } + pub fn check_interval(&self) -> Duration { self.check_interval .parse::() @@ -806,6 +815,7 @@ impl NodeConfig { Self::check_unset_single(&self.users_config, "users_config")?; Self::check_unset_single(&self.name, "name")?; Self::check_unset_single(&self.operation_timeout, "operation_timeout")?; + Self::check_unset_single(&self.check_timeout, "check_timeout")?; Self::check_unset_single(&self.backend_type, "backend_type")?; Ok(()) } @@ -850,6 +860,7 @@ impl NodeConfig { name: String::from(node_name), quorum: 1, operation_timeout: String::from("60sec"), + check_timeout: String::from("5sec"), check_interval: String::from("5000ms"), count_interval: NodeConfig::default_count_interval(), cluster_policy: String::from("quorum"), @@ -890,6 +901,9 @@ impl Validatable for NodeConfig { self.operation_timeout.parse::().map_err(|e| { format!("field 'timeout' for 'config' is not valid: {}", e) })?; + self.check_timeout.parse::().map_err(|e| { + format!("field 'timeout' for 'config' is not valid: {}", e) + })?; self.check_interval.parse::().map_err(|e| { format!("field 'check_interval' for 'config' is not valid: {}", e) })?; @@ -924,6 +938,7 @@ pub mod tests { name: name.to_string(), quorum, operation_timeout: "3sec".to_string(), + check_timeout: "1sec".to_string(), check_interval: "3sec".to_string(), cluster_policy: "quorum".to_string(), backend_type: "in_memory".to_string(), From 6e49c6d239415eb7f0a3b593ebf1fb5065592b97 Mon Sep 17 00:00:00 2001 From: Ivan Druzhitskiy Date: Thu, 7 Sep 2023 16:42:27 +0300 Subject: [PATCH 3/5] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a1313d545..03eee2071 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ Bob versions changelog - Added mimalloc allocator for musl target (#688) - Added jemalloc-profile for memory profiling (#797) - Proper support for GetSource::ALL requests (#723) +- Add check timeout config option (#805) #### Changed - BobClient clone overhead reduced (#774) From 4754114ade286a50fc75eabb0099ff9c82fceec7 Mon Sep 17 00:00:00 2001 From: Ivan Druzhitskiy Date: Thu, 7 Sep 2023 16:45:03 +0300 Subject: [PATCH 4/5] update config files --- compose_examples/one_node/configs/node.yaml | 1 + compose_examples/two_nodes/configs/node_0.yaml | 1 + compose_examples/two_nodes/configs/node_1.yaml | 1 + compose_examples/two_nodes_swarm/configs/node0.yaml | 1 + compose_examples/two_nodes_swarm/configs/node1.yaml | 1 + compose_examples/two_nodes_swarm/configs/node2.yaml | 1 + compose_examples/two_nodes_swarm/configs/node3.yaml | 1 + config-examples/node.yaml | 2 ++ dockerfiles/default-configs/node.yaml | 2 ++ test_env/node1.yaml | 3 ++- test_env/node2.yaml | 3 ++- test_env/node3.yaml | 3 ++- test_env/node4.yaml | 3 ++- 13 files changed, 19 insertions(+), 4 deletions(-) diff --git a/compose_examples/one_node/configs/node.yaml b/compose_examples/one_node/configs/node.yaml index f54e82043..2834ff1cb 100644 --- a/compose_examples/one_node/configs/node.yaml +++ b/compose_examples/one_node/configs/node.yaml @@ -3,6 +3,7 @@ log_config: /configs/logger.yaml name: node quorum: 1 operation_timeout: 15sec +check_timeout: 3sec check_interval: 5000ms cluster_policy: quorum backend_type: pearl diff --git a/compose_examples/two_nodes/configs/node_0.yaml b/compose_examples/two_nodes/configs/node_0.yaml index 973673e3b..6349d5eaf 100644 --- a/compose_examples/two_nodes/configs/node_0.yaml +++ b/compose_examples/two_nodes/configs/node_0.yaml @@ -3,6 +3,7 @@ log_config: /configs/logger.yaml name: node_0 quorum: 2 operation_timeout: 15sec +check_timeout: 3sec check_interval: 5000ms cluster_policy: quorum backend_type: pearl diff --git a/compose_examples/two_nodes/configs/node_1.yaml b/compose_examples/two_nodes/configs/node_1.yaml index 9f9382093..010ae7378 100644 --- a/compose_examples/two_nodes/configs/node_1.yaml +++ b/compose_examples/two_nodes/configs/node_1.yaml @@ -3,6 +3,7 @@ log_config: /configs/logger.yaml name: node_1 quorum: 2 operation_timeout: 15sec +check_timeout: 3sec check_interval: 5000ms cluster_policy: quorum backend_type: pearl diff --git a/compose_examples/two_nodes_swarm/configs/node0.yaml b/compose_examples/two_nodes_swarm/configs/node0.yaml index 089b18655..4b8766d9c 100644 --- a/compose_examples/two_nodes_swarm/configs/node0.yaml +++ b/compose_examples/two_nodes_swarm/configs/node0.yaml @@ -3,6 +3,7 @@ log_config: /configs/logger.yaml name: node0 quorum: 2 operation_timeout: 15sec +check_timeout: 3sec check_interval: 5000ms cleanup_interval: 60s cluster_policy: quorum diff --git a/compose_examples/two_nodes_swarm/configs/node1.yaml b/compose_examples/two_nodes_swarm/configs/node1.yaml index deff57986..8cd19b43b 100644 --- a/compose_examples/two_nodes_swarm/configs/node1.yaml +++ b/compose_examples/two_nodes_swarm/configs/node1.yaml @@ -3,6 +3,7 @@ log_config: /configs/logger.yaml name: node1 quorum: 2 operation_timeout: 15sec +check_timeout: 3sec check_interval: 5000ms cleanup_interval: 60s cluster_policy: quorum diff --git a/compose_examples/two_nodes_swarm/configs/node2.yaml b/compose_examples/two_nodes_swarm/configs/node2.yaml index 6a86beda3..8ff48121a 100644 --- a/compose_examples/two_nodes_swarm/configs/node2.yaml +++ b/compose_examples/two_nodes_swarm/configs/node2.yaml @@ -3,6 +3,7 @@ log_config: /configs/logger.yaml name: node2 quorum: 2 operation_timeout: 15sec +check_timeout: 3sec check_interval: 5000ms cleanup_interval: 60s cluster_policy: quorum diff --git a/compose_examples/two_nodes_swarm/configs/node3.yaml b/compose_examples/two_nodes_swarm/configs/node3.yaml index 20801182a..a432490bf 100644 --- a/compose_examples/two_nodes_swarm/configs/node3.yaml +++ b/compose_examples/two_nodes_swarm/configs/node3.yaml @@ -3,6 +3,7 @@ log_config: /configs/logger.yaml name: node3 quorum: 2 operation_timeout: 15sec +check_timeout: 3sec check_interval: 5000ms cleanup_interval: 60s cluster_policy: quorum diff --git a/config-examples/node.yaml b/config-examples/node.yaml index d0ecf634d..64cd580f7 100644 --- a/config-examples/node.yaml +++ b/config-examples/node.yaml @@ -39,6 +39,8 @@ name: local_node quorum: 1 # [time] timeout for every GRPC operation operation_timeout: 3sec +# [time] timeout for node check operation +check_timeout: 3sec # [time] interval for checking connections check_interval: 5000ms # [simple, quorum] simple - without checking status diff --git a/dockerfiles/default-configs/node.yaml b/dockerfiles/default-configs/node.yaml index 1fc79e1be..53cf4fb61 100644 --- a/dockerfiles/default-configs/node.yaml +++ b/dockerfiles/default-configs/node.yaml @@ -22,6 +22,8 @@ name: local_node quorum: 1 # [time] timeout for every GRPC operation operation_timeout: 3sec +# [time] timeout for node check operation +check_timeout: 3sec # [time] interval for checking connections check_interval: 5000ms # [simple, quorum] simple - without checking status diff --git a/test_env/node1.yaml b/test_env/node1.yaml index bd7ffb342..81cf49ce5 100644 --- a/test_env/node1.yaml +++ b/test_env/node1.yaml @@ -3,6 +3,7 @@ users_config: /bob/config-examples/users.yaml name: node1 quorum: 3 operation_timeout: 3sec +check_timeout: 3sec check_interval: 5000ms cleanup_interval: 5000ms cluster_policy: quorum @@ -27,4 +28,4 @@ metrics: prefix: '{metrics_name}.{local_address}.{node_name}' graphite: 127.0.0.1:2003 graphite_enabled: false - prometheus_enabled: false \ No newline at end of file + prometheus_enabled: false diff --git a/test_env/node2.yaml b/test_env/node2.yaml index d6bf899d1..d6fa8b006 100644 --- a/test_env/node2.yaml +++ b/test_env/node2.yaml @@ -2,6 +2,7 @@ log_config: /bob/config-examples/logger.yaml name: node2 quorum: 3 operation_timeout: 3sec +check_timeout: 3sec check_interval: 5000ms cluster_policy: quorum backend_type: pearl @@ -21,4 +22,4 @@ pearl: metrics: name: bob - graphite: 127.0.0.1:2003 \ No newline at end of file + graphite: 127.0.0.1:2003 diff --git a/test_env/node3.yaml b/test_env/node3.yaml index d56f616bb..02f33a5ab 100644 --- a/test_env/node3.yaml +++ b/test_env/node3.yaml @@ -2,6 +2,7 @@ log_config: /bob/config-examples/logger.yaml name: node3 quorum: 3 operation_timeout: 3sec +check_timeout: 3sec check_interval: 5000ms cluster_policy: quorum backend_type: pearl @@ -21,4 +22,4 @@ pearl: metrics: name: bob - graphite: 127.0.0.1:2003 \ No newline at end of file + graphite: 127.0.0.1:2003 diff --git a/test_env/node4.yaml b/test_env/node4.yaml index f9560dcdd..579defe16 100644 --- a/test_env/node4.yaml +++ b/test_env/node4.yaml @@ -2,6 +2,7 @@ log_config: /bob/config-examples/logger.yaml name: node4 quorum: 3 operation_timeout: 3sec +check_timeout: 3sec check_interval: 5000ms cluster_policy: quorum backend_type: pearl @@ -21,4 +22,4 @@ pearl: metrics: name: bob - graphite: 127.0.0.1:2003 \ No newline at end of file + graphite: 127.0.0.1:2003 From b18cfa3868ecce1b38afa2711d31812547ad7463 Mon Sep 17 00:00:00 2001 From: Ivan Druzhitskiy Date: Thu, 7 Sep 2023 19:06:35 +0300 Subject: [PATCH 5/5] rename ping to check timeout --- bob-common/src/bob_client.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/bob-common/src/bob_client.rs b/bob-common/src/bob_client.rs index 522fa933d..3320fcefd 100644 --- a/bob-common/src/bob_client.rs +++ b/bob-common/src/bob_client.rs @@ -31,7 +31,7 @@ pub mod b_client { local_node_name: NodeName, operation_timeout: Duration, - ping_timeout: Duration, + check_timeout: Duration, auth_header: String, metrics: BobClientMetrics, } @@ -43,7 +43,7 @@ pub mod b_client { pub async fn create( node: &Node, operation_timeout: Duration, - ping_timeout: Duration, + check_timeout: Duration, metrics: BobClientMetrics, local_node_name: NodeName, tls_config: Option<&FactoryTlsConfig>, @@ -71,7 +71,7 @@ pub mod b_client { target_node_address: node.address().to_owned(), local_node_name, operation_timeout, - ping_timeout, + check_timeout, auth_header, metrics }) @@ -157,7 +157,7 @@ pub mod b_client { let mut req = Request::new(Null {}); self.set_credentials(&mut req); self.set_node_name(&mut req); - self.set_ping_timeout(&mut req); + self.set_check_timeout(&mut req); let node_name = self.target_node_name.to_owned(); let mut client = self.client.clone(); @@ -246,8 +246,8 @@ pub mod b_client { r.set_timeout(self.operation_timeout); } - fn set_ping_timeout(&self, r: &mut Request) { - r.set_timeout(self.ping_timeout); + fn set_check_timeout(&self, r: &mut Request) { + r.set_timeout(self.check_timeout); } } @@ -322,7 +322,7 @@ pub struct FactoryTlsConfig { #[derive(Clone)] pub struct Factory { operation_timeout: Duration, - ping_timeout: Duration, + check_timeout: Duration, metrics: Arc, local_node_name: NodeName, tls_config: Option, @@ -333,14 +333,14 @@ impl Factory { #[must_use] pub fn new( operation_timeout: Duration, - ping_timeout: Duration, + check_timeout: Duration, metrics: Arc, local_node_name: NodeName, tls_config: Option, ) -> Self { Factory { operation_timeout, - ping_timeout, + check_timeout, metrics, local_node_name, tls_config, @@ -351,7 +351,7 @@ impl Factory { BobClient::create( node, self.operation_timeout, - self.ping_timeout, + self.check_timeout, metrics, self.local_node_name.clone(), self.tls_config.as_ref(),