Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

Add node authority status metric #4699

Merged
merged 21 commits into from
Jan 13, 2022
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 63 additions & 2 deletions node/network/gossip-support/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,18 @@ use polkadot_node_subsystem::{
RuntimeApiRequest,
},
overseer, ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, SubsystemContext,
SubsystemError,
SubsystemError, SubsystemSender,
};
use polkadot_node_subsystem_util as util;
use polkadot_primitives::v1::{AuthorityDiscoveryId, Hash, SessionIndex};

#[cfg(test)]
mod tests;

mod metrics;

use metrics::Metrics;

const LOG_TARGET: &str = "parachain::gossip-support";
// How much time should we wait to reissue a connection request
// since the last authority discovery resolution failure.
Expand Down Expand Up @@ -104,14 +108,21 @@ pub struct GossipSupport<AD> {
connected_authorities_by_peer_id: HashMap<PeerId, HashSet<AuthorityDiscoveryId>>,
/// Authority discovery service.
authority_discovery: AD,

/// Subsystem metrics.
metrics: Metrics,
}

impl<AD> GossipSupport<AD>
where
AD: AuthorityDiscovery,
{
/// Create a new instance of the [`GossipSupport`] subsystem.
pub fn new(keystore: SyncCryptoStorePtr, authority_discovery: AD) -> Self {
pub fn new(keystore: SyncCryptoStorePtr, authority_discovery: AD, metrics: Metrics) -> Self {
// Initialize metrics to `0`.
metrics.on_is_not_authority();
metrics.on_is_not_parachain_validator();

Self {
keystore,
last_session_index: None,
Expand All @@ -121,6 +132,7 @@ where
connected_authorities: HashMap::new(),
connected_authorities_by_peer_id: HashMap::new(),
authority_discovery,
metrics,
}
}

Expand Down Expand Up @@ -223,10 +235,59 @@ where

if is_new_session {
update_gossip_topology(ctx, our_index, all_authorities, relay_parent).await?;
self.update_authority_status_metrics(leaf, ctx.sender()).await?;
}
}
}
Ok(())
}

async fn update_authority_status_metrics(
&mut self,
leaf: Hash,
sender: &mut impl SubsystemSender,
) -> Result<(), util::Error> {
if let Some(session_info) = util::request_session_info(
leaf,
self.last_session_index
.expect("Last session index is always set on every session index change"),
sender,
)
.await
.await??
{
let maybe_index = match ensure_i_am_an_authority(
&self.keystore,
&session_info.discovery_keys,
)
.await
{
Ok(index) => {
self.metrics.on_is_authority();
Some(index)
},
Err(util::Error::NotAValidator) => {
self.metrics.on_is_not_authority();
None
},
// Don't update on runtime errors.
Err(_) => None,
};

if let Some(validator_index) = maybe_index {
// The subset of authorities participating in parachain consensus.
let parachain_validators_this_session = session_info.validators;

// First `maxValidators` entries are the parachain validators. We'll check
// if our index is in this set to avoid searching for the keys.
// https://github.com/paritytech/polkadot/blob/a52dca2be7840b23c19c153cf7e110b1e3e475f8/runtime/parachains/src/configuration.rs#L148
if validator_index < parachain_validators_this_session.len() {
self.metrics.on_is_parachain_validator();
} else {
self.metrics.on_is_not_parachain_validator();
}
}
}
Ok(())
}

Expand Down
90 changes: 90 additions & 0 deletions node/network/gossip-support/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Copyright 2021 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.

// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.

use polkadot_node_subsystem_util::{
metrics,
metrics::{
prometheus,
prometheus::{Gauge, PrometheusError, Registry, U64},
},
};

/// Dispute Distribution metrics.
#[derive(Clone, Default)]
pub struct Metrics(Option<MetricsInner>);

#[derive(Clone)]
struct MetricsInner {
/// Tracks authority status for producing relay chain blocks.
is_authority: Gauge<U64>,
/// Tracks authority status for parachain approval checking.
is_parachain_validator: Gauge<U64>,
sandreim marked this conversation as resolved.
Show resolved Hide resolved
}

impl Metrics {
/// Dummy constructor for testing.
#[cfg(test)]
pub fn new_dummy() -> Self {
Self(None)
}

/// Set the `relaychain validator` metric.
pub fn on_is_authority(&self) {
ordian marked this conversation as resolved.
Show resolved Hide resolved
if let Some(metrics) = &self.0 {
metrics.is_authority.set(1);
}
}

/// Unset the `relaychain validator` metric.
pub fn on_is_not_authority(&self) {
sandreim marked this conversation as resolved.
Show resolved Hide resolved
if let Some(metrics) = &self.0 {
metrics.is_authority.set(0);
}
}

/// Set the `parachain validator` metric.
pub fn on_is_parachain_validator(&self) {
if let Some(metrics) = &self.0 {
metrics.is_parachain_validator.set(1);
}
}

/// Unset the `parachain validator` metric.
pub fn on_is_not_parachain_validator(&self) {
if let Some(metrics) = &self.0 {
metrics.is_parachain_validator.set(0);
}
}
}

impl metrics::Metrics for Metrics {
fn try_register(registry: &Registry) -> Result<Self, PrometheusError> {
let metrics = MetricsInner {
is_authority: prometheus::register(
Gauge::new("polkadot_node_is_authority", "Tracks the node authority status across sessions. \
An authority is any node that is a potential block producer in a session.")?,
registry,
)?,
is_parachain_validator: prometheus::register(
Gauge::new("polkadot_node_is_parachain_validator",
"Tracks the node parachain validator status across sessions. Parachain validators are a \
subset of authorities that perform approval checking of all parachain candidates in a session.")?,
registry,
)?,
};
Ok(Metrics(Some(metrics)))
}
}
42 changes: 41 additions & 1 deletion node/network/gossip-support/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,11 @@ async fn get_other_authorities_addrs_map() -> HashMap<AuthorityDiscoveryId, Hash
}

fn make_subsystem() -> GossipSupport<MockAuthorityDiscovery> {
GossipSupport::new(make_ferdie_keystore(), MOCK_AUTHORITY_DISCOVERY.clone())
GossipSupport::new(
make_ferdie_keystore(),
MOCK_AUTHORITY_DISCOVERY.clone(),
Metrics::new_dummy(),
)
}

fn test_harness<T: Future<Output = VirtualOverseer>, AD: AuthorityDiscovery>(
Expand Down Expand Up @@ -230,6 +234,7 @@ fn issues_a_connection_request_on_new_session() {
tx.send(Ok(1)).unwrap();
}
);

assert_matches!(
overseer_recv(overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
Expand All @@ -254,6 +259,17 @@ fn issues_a_connection_request_on_new_session() {

test_neighbors(overseer).await;

assert_matches!(
overseer_recv(overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::SessionInfo(1, sender),
)) => {
assert_eq!(relay_parent, hash);
sender.send(Ok(Some(polkadot_primitives::v2::SessionInfo::default()))).unwrap();
}
);

virtual_overseer
});

Expand Down Expand Up @@ -296,6 +312,7 @@ fn issues_a_connection_request_on_new_session() {
tx.send(Ok(2)).unwrap();
}
);

assert_matches!(
overseer_recv(overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
Expand All @@ -320,6 +337,17 @@ fn issues_a_connection_request_on_new_session() {

test_neighbors(overseer).await;

assert_matches!(
overseer_recv(overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::SessionInfo(2, sender),
)) => {
assert_eq!(relay_parent, hash);
sender.send(Ok(Some(polkadot_primitives::v2::SessionInfo::default()))).unwrap();
}
);

virtual_overseer
});
assert_eq!(state.last_session_index, Some(2));
Expand Down Expand Up @@ -378,6 +406,7 @@ fn issues_a_connection_request_when_last_request_was_mostly_unresolved() {
tx.send(Ok(1)).unwrap();
}
);

assert_matches!(
overseer_recv(overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
Expand Down Expand Up @@ -406,6 +435,17 @@ fn issues_a_connection_request_when_last_request_was_mostly_unresolved() {

test_neighbors(overseer).await;

assert_matches!(
overseer_recv(overseer).await,
AllMessages::RuntimeApi(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::SessionInfo(1, sender),
)) => {
assert_eq!(relay_parent, hash);
sender.send(Ok(Some(polkadot_primitives::v2::SessionInfo::default()))).unwrap();
}
);

virtual_overseer
})
};
Expand Down
1 change: 1 addition & 0 deletions node/service/src/overseer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ where
.gossip_support(GossipSupportSubsystem::new(
keystore.clone(),
authority_discovery_service.clone(),
Metrics::register(registry)?,
))
.dispute_coordinator(if disputes_enabled {
DisputeCoordinatorSubsystem::new(
Expand Down
3 changes: 2 additions & 1 deletion primitives/src/v2/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ use parity_util_mem::MallocSizeOf;

/// Information about validator sets of a session.
#[derive(Clone, Encode, Decode, RuntimeDebug, TypeInfo)]
#[cfg_attr(feature = "std", derive(PartialEq, MallocSizeOf))]
#[cfg_attr(feature = "std", derive(PartialEq, MallocSizeOf, Default))]
drahnr marked this conversation as resolved.
Show resolved Hide resolved
#[cfg_attr(all(feature = "std", test), derive(Default))]
sandreim marked this conversation as resolved.
Show resolved Hide resolved
pub struct SessionInfo {
/****** New in v2 *******/
/// All the validators actively participating in parachain consensus.
Expand Down