diff --git a/node/network/gossip-support/src/lib.rs b/node/network/gossip-support/src/lib.rs index c01c816dafcd..a12fbf82d236 100644 --- a/node/network/gossip-support/src/lib.rs +++ b/node/network/gossip-support/src/lib.rs @@ -49,7 +49,7 @@ use polkadot_node_subsystem::{ RuntimeApiRequest, }, overseer, ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, SubsystemContext, - SubsystemError, + SubsystemError, SubsystemSender, }; use polkadot_node_subsystem_util as util; use polkadot_primitives::v1::{AuthorityDiscoveryId, Hash, SessionIndex}; @@ -57,6 +57,10 @@ use polkadot_primitives::v1::{AuthorityDiscoveryId, Hash, SessionIndex}; #[cfg(test)] mod tests; +mod metrics; + +use metrics::Metrics; + const LOG_TARGET: &str = "parachain::gossip-support"; // How much time should we wait to reissue a connection request // since the last authority discovery resolution failure. @@ -104,6 +108,9 @@ pub struct GossipSupport { connected_authorities_by_peer_id: HashMap>, /// Authority discovery service. authority_discovery: AD, + + /// Subsystem metrics. + metrics: Metrics, } impl GossipSupport @@ -111,7 +118,11 @@ where AD: AuthorityDiscovery, { /// Create a new instance of the [`GossipSupport`] subsystem. - pub fn new(keystore: SyncCryptoStorePtr, authority_discovery: AD) -> Self { + pub fn new(keystore: SyncCryptoStorePtr, authority_discovery: AD, metrics: Metrics) -> Self { + // Initialize metrics to `0`. + metrics.on_is_not_authority(); + metrics.on_is_not_parachain_validator(); + Self { keystore, last_session_index: None, @@ -121,6 +132,7 @@ where connected_authorities: HashMap::new(), connected_authorities_by_peer_id: HashMap::new(), authority_discovery, + metrics, } } @@ -223,10 +235,60 @@ where if is_new_session { update_gossip_topology(ctx, our_index, all_authorities, relay_parent).await?; + self.update_authority_status_metrics(leaf, ctx.sender()).await?; } } } + Ok(()) + } + async fn update_authority_status_metrics( + &mut self, + leaf: Hash, + sender: &mut impl SubsystemSender, + ) -> Result<(), util::Error> { + if let Some(session_info) = util::request_session_info( + leaf, + self.last_session_index + .expect("Last session index is always set on every session index change"), + sender, + ) + .await + .await?? + { + let maybe_index = match ensure_i_am_an_authority( + &self.keystore, + &session_info.discovery_keys, + ) + .await + { + Ok(index) => { + self.metrics.on_is_authority(); + Some(index) + }, + Err(util::Error::NotAValidator) => { + self.metrics.on_is_not_authority(); + self.metrics.on_is_not_parachain_validator(); + None + }, + // Don't update on runtime errors. + Err(_) => None, + }; + + if let Some(validator_index) = maybe_index { + // The subset of authorities participating in parachain consensus. + let parachain_validators_this_session = session_info.validators; + + // First `maxValidators` entries are the parachain validators. We'll check + // if our index is in this set to avoid searching for the keys. + // https://github.com/paritytech/polkadot/blob/a52dca2be7840b23c19c153cf7e110b1e3e475f8/runtime/parachains/src/configuration.rs#L148 + if validator_index < parachain_validators_this_session.len() { + self.metrics.on_is_parachain_validator(); + } else { + self.metrics.on_is_not_parachain_validator(); + } + } + } Ok(()) } diff --git a/node/network/gossip-support/src/metrics.rs b/node/network/gossip-support/src/metrics.rs new file mode 100644 index 000000000000..67aa258921d8 --- /dev/null +++ b/node/network/gossip-support/src/metrics.rs @@ -0,0 +1,90 @@ +// Copyright 2021 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use polkadot_node_subsystem_util::{ + metrics, + metrics::{ + prometheus, + prometheus::{Gauge, PrometheusError, Registry, U64}, + }, +}; + +/// Dispute Distribution metrics. +#[derive(Clone, Default)] +pub struct Metrics(Option); + +#[derive(Clone)] +struct MetricsInner { + /// Tracks authority status for producing relay chain blocks. + is_authority: Gauge, + /// Tracks authority status for parachain approval checking. + is_parachain_validator: Gauge, +} + +impl Metrics { + /// Dummy constructor for testing. + #[cfg(test)] + pub fn new_dummy() -> Self { + Self(None) + } + + /// Set the `relaychain validator` metric. + pub fn on_is_authority(&self) { + if let Some(metrics) = &self.0 { + metrics.is_authority.set(1); + } + } + + /// Unset the `relaychain validator` metric. + pub fn on_is_not_authority(&self) { + if let Some(metrics) = &self.0 { + metrics.is_authority.set(0); + } + } + + /// Set the `parachain validator` metric. + pub fn on_is_parachain_validator(&self) { + if let Some(metrics) = &self.0 { + metrics.is_parachain_validator.set(1); + } + } + + /// Unset the `parachain validator` metric. + pub fn on_is_not_parachain_validator(&self) { + if let Some(metrics) = &self.0 { + metrics.is_parachain_validator.set(0); + } + } +} + +impl metrics::Metrics for Metrics { + fn try_register(registry: &Registry) -> Result { + let metrics = MetricsInner { + is_authority: prometheus::register( + Gauge::new("polkadot_node_is_authority", "Tracks the node authority status across sessions. \ + An authority is any node that is a potential block producer in a session.")?, + registry, + )?, + is_parachain_validator: prometheus::register( + Gauge::new("polkadot_node_is_parachain_validator", + "Tracks the node parachain validator status across sessions. Parachain validators are a \ + subset of authorities that perform approval checking of all parachain candidates in a session.")?, + registry, + )?, + }; + Ok(Metrics(Some(metrics))) + } +} diff --git a/node/network/gossip-support/src/tests.rs b/node/network/gossip-support/src/tests.rs index a3267a1daa11..5009742e8631 100644 --- a/node/network/gossip-support/src/tests.rs +++ b/node/network/gossip-support/src/tests.rs @@ -126,7 +126,11 @@ async fn get_other_authorities_addrs_map() -> HashMap GossipSupport { - GossipSupport::new(make_ferdie_keystore(), MOCK_AUTHORITY_DISCOVERY.clone()) + GossipSupport::new( + make_ferdie_keystore(), + MOCK_AUTHORITY_DISCOVERY.clone(), + Metrics::new_dummy(), + ) } fn test_harness, AD: AuthorityDiscovery>( @@ -230,6 +234,7 @@ fn issues_a_connection_request_on_new_session() { tx.send(Ok(1)).unwrap(); } ); + assert_matches!( overseer_recv(overseer).await, AllMessages::RuntimeApi(RuntimeApiMessage::Request( @@ -254,6 +259,17 @@ fn issues_a_connection_request_on_new_session() { test_neighbors(overseer).await; + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::SessionInfo(1, sender), + )) => { + assert_eq!(relay_parent, hash); + sender.send(Ok(None)).unwrap(); + } + ); + virtual_overseer }); @@ -296,6 +312,7 @@ fn issues_a_connection_request_on_new_session() { tx.send(Ok(2)).unwrap(); } ); + assert_matches!( overseer_recv(overseer).await, AllMessages::RuntimeApi(RuntimeApiMessage::Request( @@ -320,6 +337,17 @@ fn issues_a_connection_request_on_new_session() { test_neighbors(overseer).await; + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::SessionInfo(2, sender), + )) => { + assert_eq!(relay_parent, hash); + sender.send(Ok(None)).unwrap(); + } + ); + virtual_overseer }); assert_eq!(state.last_session_index, Some(2)); @@ -378,6 +406,7 @@ fn issues_a_connection_request_when_last_request_was_mostly_unresolved() { tx.send(Ok(1)).unwrap(); } ); + assert_matches!( overseer_recv(overseer).await, AllMessages::RuntimeApi(RuntimeApiMessage::Request( @@ -406,6 +435,17 @@ fn issues_a_connection_request_when_last_request_was_mostly_unresolved() { test_neighbors(overseer).await; + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::SessionInfo(1, sender), + )) => { + assert_eq!(relay_parent, hash); + sender.send(Ok(None)).unwrap(); + } + ); + virtual_overseer }) }; diff --git a/node/service/src/overseer.rs b/node/service/src/overseer.rs index bbdc7692fc52..58be883fa199 100644 --- a/node/service/src/overseer.rs +++ b/node/service/src/overseer.rs @@ -264,6 +264,7 @@ where .gossip_support(GossipSupportSubsystem::new( keystore.clone(), authority_discovery_service.clone(), + Metrics::register(registry)?, )) .dispute_coordinator(if disputes_enabled { DisputeCoordinatorSubsystem::new(