Skip to content

Commit

Permalink
add more metrics of syncing (#1467)
Browse files Browse the repository at this point in the history
* add more metrics of syncing

* add metric register tests
  • Loading branch information
LesnyRumcajs authored Mar 11, 2022
1 parent e708777 commit 7783ef8
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 0 deletions.
4 changes: 4 additions & 0 deletions blockchain/chain_sync/src/chain_muxer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,7 @@ where
"Evaluating the network head failed, retrying. Error = {:?}",
why
);
metrics::NETWORK_HEAD_EVALUATION_ERRORS.inc();
self.state = ChainMuxerState::Idle;
}
Poll::Pending => return Poll::Pending,
Expand All @@ -849,6 +850,7 @@ where
Poll::Ready(Err(why)) => {
// TODO: Should we exponentially back off before retrying?
error!("Bootstrapping failed, re-evaluating the network head to retry the bootstrap. Error = {:?}", why);
metrics::BOOTSTRAP_ERRORS.inc();
self.state = ChainMuxerState::Idle;
}
Poll::Pending => return Poll::Pending,
Expand All @@ -857,10 +859,12 @@ where
ChainMuxerState::Follow(ref mut follow) => match follow.as_mut().poll(cx) {
Poll::Ready(Ok(_)) => {
error!("Following the network unexpectedly ended without an error; restarting the sync process.");
metrics::FOLLOW_NETWORK_INTERRUPTIONS.inc();
self.state = ChainMuxerState::Idle;
}
Poll::Ready(Err(why)) => {
error!("Following the network failed, restarted. Error = {:?}", why);
metrics::FOLLOW_NETWORK_ERRORS.inc();
self.state = ChainMuxerState::Idle;
}
Poll::Pending => return Poll::Pending,
Expand Down
107 changes: 107 additions & 0 deletions blockchain/chain_sync/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,79 @@ lazy_static! {
.expect("Registering the bad_peers metric with the metrics registry must succeed");
bad_peers
};
pub static ref LAST_VALIDATED_TIPSET_EPOCH: Box<GenericGauge<AtomicU64>> = {
let last_validated_tipset_epoch = Box::new(
GenericGauge::<AtomicU64>::new(
"last_validated_tipset_epoch",
"Last validated tipset epoch",
)
.expect("Defining the last_validated_tipset_epoch metric must succeed"),
);
prometheus::default_registry()
.register(last_validated_tipset_epoch.clone())
.expect("Registering the last_validated_tipset_epoch metric with the metrics registry must succeed");
last_validated_tipset_epoch
};
pub static ref NETWORK_HEAD_EVALUATION_ERRORS: Box<GenericCounter<AtomicU64>> = {
let network_head_evaluation_errors = Box::new(
GenericCounter::<AtomicU64>::new(
"network_head_evaluation_errors",
"Total number of network head evaluation errors",
)
.expect("Defining the network_head_evaluation_errors metric must succeed"),
);
prometheus::default_registry()
.register(network_head_evaluation_errors.clone())
.expect(
"Registering the network_head_evaluation_errors metric with the metrics registry must succeed",
);
network_head_evaluation_errors
};
pub static ref BOOTSTRAP_ERRORS: Box<GenericCounter<AtomicU64>> = {
let boostrap_errors = Box::new(
GenericCounter::<AtomicU64>::new(
"bootstrap_errors",
"Total number of bootstrap attempts failures",
)
.expect("Defining the bootstrap_errors metric must succeed"),
);
prometheus::default_registry()
.register(boostrap_errors.clone())
.expect(
"Registering the bootstrap_errors metric with the metrics registry must succeed",
);
boostrap_errors
};
pub static ref FOLLOW_NETWORK_INTERRUPTIONS: Box<GenericCounter<AtomicU64>> = {
let follow_network_restarts = Box::new(
GenericCounter::<AtomicU64>::new(
"follow_network_interruptions",
"Total number of follow network interruptions, where it unexpectedly ended",
)
.expect("Defining the follow_network_interruptions metric must succeed"),
);
prometheus::default_registry()
.register(follow_network_restarts.clone())
.expect(
"Registering the follow_network_interruptions metric with the metrics registry must succeed",
);
follow_network_restarts
};
pub static ref FOLLOW_NETWORK_ERRORS: Box<GenericCounter<AtomicU64>> = {
let follow_network_errors = Box::new(
GenericCounter::<AtomicU64>::new(
"follow_network_errors",
"Total number of follow network errors",
)
.expect("Defining the follow_network_errors metric must succeed"),
);
prometheus::default_registry()
.register(follow_network_errors.clone())
.expect(
"Registering the follow_network_errors metric with the metrics registry must succeed",
);
follow_network_errors
};
}

pub mod labels {
Expand All @@ -133,3 +206,37 @@ pub mod values {
pub const CHAIN_EXCHANGE_REQUEST: &str = "chain_exchange_request";
pub const BITSWAP_BLOCK: &str = "bitswap_block";
}

#[cfg(test)]
mod tests {
use super::*;
use prometheus::core::Metric;

macro_rules! test_counter {
($name:ident) => {
let _ = $name.metric();
};
}

macro_rules! test_counter_vec {
($name:ident) => {
let _ = $name.with_label_values(&["label"]);
};
}
#[test]
fn metrics_defined_and_registered() {
test_counter!(TIPSET_PROCESSING_TIME);
test_counter_vec!(LIBP2P_MESSAGE_TOTAL);
test_counter!(INVALID_TIPSET_TOTAL);
test_counter!(TIPSET_RANGE_SYNC_FAILURE_TOTAL);
test_counter!(HEAD_EPOCH);
test_counter!(PEER_FAILURE_TOTAL);
test_counter!(FULL_PEERS);
test_counter!(BAD_PEERS);
test_counter!(LAST_VALIDATED_TIPSET_EPOCH);
test_counter!(NETWORK_HEAD_EVALUATION_ERRORS);
test_counter!(BOOTSTRAP_ERRORS);
test_counter!(FOLLOW_NETWORK_INTERRUPTIONS);
test_counter!(FOLLOW_NETWORK_ERRORS);
}
}
2 changes: 2 additions & 0 deletions blockchain/chain_sync/src/tipset_syncer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1040,6 +1040,7 @@ async fn sync_messages_check_state<
)
.await?;
tracker.write().await.set_epoch(current_epoch);
metrics::LAST_VALIDATED_TIPSET_EPOCH.set(current_epoch as u64);
}
None => {
// Full tipset is not in storage; request messages via chain_exchange
Expand Down Expand Up @@ -1090,6 +1091,7 @@ async fn sync_messages_check_state<
.await?;
tracker.write().await.set_epoch(current_epoch);
timer.observe_duration();
metrics::LAST_VALIDATED_TIPSET_EPOCH.set(current_epoch as u64);

// Persist the messages in the store
if let Some(m) = bundle.messages {
Expand Down

0 comments on commit 7783ef8

Please sign in to comment.