From bbe244aad2aa52a87476644299fae429639fa046 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 18 Mar 2022 15:23:34 +0000 Subject: [PATCH] Some improvements in the statement-distribution histograms (#5154) --- .../network/statement-distribution/src/lib.rs | 6 ++- .../statement-distribution/src/metrics.rs | 48 +++++++++++++------ 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/node/network/statement-distribution/src/lib.rs b/node/network/statement-distribution/src/lib.rs index 9f1eaaa5742c..e4f77f406de0 100644 --- a/node/network/statement-distribution/src/lib.rs +++ b/node/network/statement-distribution/src/lib.rs @@ -1308,6 +1308,7 @@ async fn handle_incoming_message_and_circulate<'a>( // statement before a `Seconded` statement. `Seconded` statements are the only ones // that require dependents. Thus, if this is a `Seconded` statement for a candidate we // were not aware of before, we cannot have any dependent statements from the candidate. + let _ = metrics.time_network_bridge_update_v1("circulate_statement"); let _ = circulate_statement( gossip_peers, peers, @@ -1336,6 +1337,7 @@ async fn handle_incoming_message<'a>( metrics: &Metrics, ) -> Option<(Hash, StoredStatement<'a>)> { let relay_parent = message.get_relay_parent(); + let _ = metrics.time_network_bridge_update_v1("handle_incoming_message"); let active_head = match active_heads.get_mut(&relay_parent) { Some(h) => h, @@ -1585,6 +1587,7 @@ async fn handle_network_update( } }, NetworkBridgeEvent::NewGossipTopology(new_peers) => { + let _ = metrics.time_network_bridge_update_v1("new_gossip_topology"); let newly_added: Vec = new_peers.difference(gossip_peers).cloned().collect(); *gossip_peers = new_peers; for peer in newly_added { @@ -1617,6 +1620,7 @@ async fn handle_network_update( .await; }, NetworkBridgeEvent::PeerViewChange(peer, view) => { + let _ = metrics.time_network_bridge_update_v1("peer_view_change"); gum::trace!(target: LOG_TARGET, ?peer, ?view, "Peer view change"); match peers.get_mut(&peer) { Some(data) => @@ -1976,8 +1980,6 @@ impl StatementDistributionSubsystem { .await; }, StatementDistributionMessage::NetworkBridgeUpdateV1(event) => { - let _timer = metrics.time_network_bridge_update_v1(); - handle_network_update( peers, gossip_peers, diff --git a/node/network/statement-distribution/src/metrics.rs b/node/network/statement-distribution/src/metrics.rs index a425518720fb..6bc6f724ae09 100644 --- a/node/network/statement-distribution/src/metrics.rs +++ b/node/network/statement-distribution/src/metrics.rs @@ -16,6 +16,10 @@ use polkadot_node_subsystem_util::metrics::{self, prometheus}; +/// Buckets more suitable for checking the typical latency values +const HISTOGRAM_LATENCY_BUCKETS: &[f64] = + &[0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.75, 0.9, 1.0, 1.2, 1.5, 1.75]; + #[derive(Clone)] struct MetricsInner { statements_distributed: prometheus::Counter, @@ -23,7 +27,7 @@ struct MetricsInner { received_responses: prometheus::CounterVec, active_leaves_update: prometheus::Histogram, share: prometheus::Histogram, - network_bridge_update_v1: prometheus::Histogram, + network_bridge_update_v1: prometheus::HistogramVec, statements_unexpected: prometheus::CounterVec, created_message_size: prometheus::Gauge, } @@ -74,8 +78,14 @@ impl Metrics { /// Provide a timer for `network_bridge_update_v1` which observes on drop. pub fn time_network_bridge_update_v1( &self, + message_type: &'static str, ) -> Option { - self.0.as_ref().map(|metrics| metrics.network_bridge_update_v1.start_timer()) + self.0.as_ref().map(|metrics| { + metrics + .network_bridge_update_v1 + .with_label_values(&[message_type]) + .start_timer() + }) } /// Update the out-of-view statements counter for unexpected valid statements @@ -137,24 +147,34 @@ impl metrics::Metrics for Metrics { registry, )?, active_leaves_update: prometheus::register( - prometheus::Histogram::with_opts(prometheus::HistogramOpts::new( - "polkadot_parachain_statement_distribution_active_leaves_update", - "Time spent within `statement_distribution::active_leaves_update`", - ))?, + prometheus::Histogram::with_opts( + prometheus::HistogramOpts::new( + "polkadot_parachain_statement_distribution_active_leaves_update", + "Time spent within `statement_distribution::active_leaves_update`", + ) + .buckets(HISTOGRAM_LATENCY_BUCKETS.into()), + )?, registry, )?, share: prometheus::register( - prometheus::Histogram::with_opts(prometheus::HistogramOpts::new( - "polkadot_parachain_statement_distribution_share", - "Time spent within `statement_distribution::share`", - ))?, + prometheus::Histogram::with_opts( + prometheus::HistogramOpts::new( + "polkadot_parachain_statement_distribution_share", + "Time spent within `statement_distribution::share`", + ) + .buckets(HISTOGRAM_LATENCY_BUCKETS.into()), + )?, registry, )?, network_bridge_update_v1: prometheus::register( - prometheus::Histogram::with_opts(prometheus::HistogramOpts::new( - "polkadot_parachain_statement_distribution_network_bridge_update_v1", - "Time spent within `statement_distribution::network_bridge_update_v1`", - ))?, + prometheus::HistogramVec::new( + prometheus::HistogramOpts::new( + "polkadot_parachain_statement_distribution_network_bridge_update_v1", + "Time spent within `statement_distribution::network_bridge_update_v1`", + ) + .buckets(HISTOGRAM_LATENCY_BUCKETS.into()), + &["message_type"], + )?, registry, )?, statements_unexpected: prometheus::register(