Skip to content

Commit

Permalink
add metrics around success rates (#530)
Browse files Browse the repository at this point in the history
* add metrics around success rates

* add #signature success within 30 s

* remove mine triple generator metric

* update default param

* change testnet default param
  • Loading branch information
ppca authored Mar 27, 2024
1 parent f701d97 commit 50f3421
Show file tree
Hide file tree
Showing 7 changed files with 128 additions and 12 deletions.
8 changes: 4 additions & 4 deletions node/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,15 @@ pub enum Cli {
#[arg(long, env("MPC_RECOVERY_MIN_TRIPLES"), default_value("20"))]
min_triples: usize,
/// At maximum, how many triples to stockpile on this node.
#[arg(long, env("MPC_RECOVERY_MAX_TRIPLES"), default_value("560"))]
#[arg(long, env("MPC_RECOVERY_MAX_TRIPLES"), default_value("640"))]
max_triples: usize,

/// At maximum, how many triple protocols can this current node introduce
/// at the same time. This should be something like `max_concurrent_gen / num_nodes`
#[arg(
long,
env("MPC_RECOVERY_MAX_CONCURRENT_INTRODUCTION"),
default_value("4")
default_value("2")
)]
max_concurrent_introduction: usize,

Expand All @@ -78,7 +78,7 @@ pub enum Cli {
#[arg(
long,
env("MPC_RECOVERY_MAX_CONCURRENT_GENERATION"),
default_value("32")
default_value("16")
)]
max_concurrent_generation: usize,

Expand All @@ -87,7 +87,7 @@ pub enum Cli {
min_presignatures: usize,

/// At maximum, how many presignatures to stockpile on the network.
#[arg(long, env("MPC_RECOVERY_MAX_PRESIGNATURES"), default_value("280"))]
#[arg(long, env("MPC_RECOVERY_MAX_PRESIGNATURES"), default_value("320"))]
max_presignatures: usize,
},
}
Expand Down
78 changes: 78 additions & 0 deletions node/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,84 @@ pub(crate) static MPC_CONTRACT_VERSION: Lazy<IntGaugeVec> = Lazy::new(|| {
.unwrap()
});

pub(crate) static NUM_TOTAL_HISTORICAL_TRIPLE_GENERATORS: Lazy<IntGaugeVec> = Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_num_total_historical_triple_generators",
"number of all triple generators historically on the node",
&["node_account_id"],
)
.unwrap()
});

pub(crate) static NUM_TOTAL_HISTORICAL_TRIPLE_GENERATORS_SUCCESS: Lazy<IntGaugeVec> =
Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_num_total_historical_triple_generators_success",
"number of all successful triple generators historically on the node",
&["node_account_id"],
)
.unwrap()
});

pub(crate) static NUM_TOTAL_HISTORICAL_TRIPLE_GENERATIONS_MINE_SUCCESS: Lazy<IntGaugeVec> =
Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_num_total_historical_triple_generations_mine_success",
"number of successful triple generators that was mine historically on the node",
&["node_account_id"],
)
.unwrap()
});

pub(crate) static NUM_TOTAL_HISTORICAL_PRESIGNATURE_GENERATORS: Lazy<IntGaugeVec> =
Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_num_total_historical_presignature_generators",
"number of all presignature generators historically on the node",
&["node_account_id"],
)
.unwrap()
});

pub(crate) static NUM_TOTAL_HISTORICAL_PRESIGNATURE_GENERATORS_SUCCESS: Lazy<IntGaugeVec> =
Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_num_total_historical_presignature_generators_success",
"number of all successful presignature generators historically on the node",
&["node_account_id"],
)
.unwrap()
});

pub(crate) static NUM_TOTAL_HISTORICAL_PRESIGNATURE_GENERATORS_MINE: Lazy<IntGaugeVec> =
Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_num_total_historical_presignature_generators_mine",
"number of mine presignature generators historically on the node",
&["node_account_id"],
)
.unwrap()
});

pub(crate) static NUM_TOTAL_HISTORICAL_PRESIGNATURE_GENERATORS_MINE_SUCCESS: Lazy<IntGaugeVec> =
Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_num_total_historical_presignature_generators_mine_success",
"number of mine presignature generators historically on the node",
&["node_account_id"],
)
.unwrap()
});

pub(crate) static NUM_SIGN_SUCCESS_30S: Lazy<IntGaugeVec> = Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_sign_requests_success_30s",
"number of successful multichain sign requests that finished within 30s, marked by publish()",
&["node_account_id"],
)
.unwrap()
});

pub fn try_create_int_gauge_vec(name: &str, help: &str, labels: &[&str]) -> Result<IntGaugeVec> {
check_metric_multichain_prefix(name)?;
let opts = Opts::new(name, help);
Expand Down
10 changes: 7 additions & 3 deletions node/src/protocol/consensus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,11 @@ impl ConsensusProtocol for StartedState {
}
Ordering::Less => Err(ConsensusError::EpochRollback),
Ordering::Equal => {
let account_id = ctx.my_account_id();
let sign_queue = ctx.sign_queue();
match contract_state.participants.find_participant(account_id) {
match contract_state
.participants
.find_participant(&ctx.my_account_id().clone())
{
Some(me) => {
tracing::info!(
"started: contract state is running and we are already a participant"
Expand All @@ -130,7 +132,7 @@ impl ConsensusProtocol for StartedState {
me,
contract_state.threshold,
epoch,
account_id.clone(),
ctx.my_account_id().clone(),
ctx.cfg(),
);
let triple_manager = TripleManager::new(
Expand All @@ -140,6 +142,7 @@ impl ConsensusProtocol for StartedState {
ctx.cfg(),
self.triple_data,
ctx.triple_storage(),
ctx.my_account_id().clone(),
);
Ok(NodeState::Running(RunningState {
epoch,
Expand Down Expand Up @@ -352,6 +355,7 @@ impl ConsensusProtocol for WaitingForConsensusState {
ctx.cfg(),
vec![],
ctx.triple_storage(),
ctx.my_account_id().clone(),
);

Ok(NodeState::Running(RunningState {
Expand Down
16 changes: 15 additions & 1 deletion node/src/protocol/presignature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,12 @@ impl PresignatureManager {
)?;
self.generators.insert(id, generator);
self.introduced.insert(id);
crate::metrics::NUM_TOTAL_HISTORICAL_PRESIGNATURE_GENERATORS
.with_label_values(&[&self.my_account_id.as_ref()])
.inc();
crate::metrics::NUM_TOTAL_HISTORICAL_PRESIGNATURE_GENERATORS_MINE
.with_label_values(&[&self.my_account_id.as_ref()])
.inc();
Ok(())
}

Expand Down Expand Up @@ -330,6 +336,9 @@ impl PresignatureManager {
false,
)?;
let generator = entry.insert(generator);
crate::metrics::NUM_TOTAL_HISTORICAL_PRESIGNATURE_GENERATORS
.with_label_values(&[&self.my_account_id.as_ref()])
.inc();
Ok(&mut generator.protocol)
}
Entry::Occupied(entry) => Ok(&mut entry.into_mut().protocol),
Expand Down Expand Up @@ -423,13 +432,18 @@ impl PresignatureManager {
if generator.mine {
tracing::info!(id, "assigning presignature to myself");
self.mine.push_back(*id);
crate::metrics::NUM_TOTAL_HISTORICAL_PRESIGNATURE_GENERATORS_MINE_SUCCESS
.with_label_values(&[&self.my_account_id.as_ref()])
.inc();
}
self.introduced.remove(id);

crate::metrics::PRESIGNATURE_LATENCY
.with_label_values(&[&self.my_account_id.as_ref()])
.observe(generator.timestamp.elapsed().as_secs_f64());

crate::metrics::NUM_TOTAL_HISTORICAL_PRESIGNATURE_GENERATORS_SUCCESS
.with_label_values(&[&self.my_account_id.as_ref()])
.inc();
// Do not retain the protocol
return false;
}
Expand Down
5 changes: 5 additions & 0 deletions node/src/protocol/signature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,11 @@ impl SignatureManager {
crate::metrics::SIGN_LATENCY
.with_label_values(&[my_account_id])
.observe(time_added.elapsed().as_secs_f64());
if time_added.elapsed().as_secs() <= 30 {
crate::metrics::NUM_SIGN_SUCCESS_30S
.with_label_values(&[my_account_id])
.inc();
}
tracing::info!(%receipt_id, big_r = signature.big_r.to_base58(), s = ?signature.s, status = ?response.status, "published signature response");
}
Ok(())
Expand Down
22 changes: 18 additions & 4 deletions node/src/protocol/triple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use cait_sith::triples::{TripleGenerationOutput, TriplePub, TripleShare};
use highway::{HighwayHash, HighwayHasher};
use k256::elliptic_curve::group::GroupEncoding;
use k256::Secp256k1;
use near_lake_primitives::AccountId;
use serde::{Deserialize, Serialize};
use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet, VecDeque};
Expand Down Expand Up @@ -117,6 +118,7 @@ pub struct TripleManager {
pub triple_storage: LockTripleNodeStorageBox,
/// triple generation protocols that failed.
pub failed_triples: HashMap<TripleId, Instant>,
pub my_account_id: AccountId,
}

impl TripleManager {
Expand All @@ -127,6 +129,7 @@ impl TripleManager {
cfg: Config,
triple_data: Vec<TripleData>,
triple_storage: LockTripleNodeStorageBox,
my_account_id: AccountId,
) -> Self {
let mut mine: VecDeque<TripleId> = VecDeque::new();
let mut all_triples = HashMap::new();
Expand All @@ -152,6 +155,7 @@ impl TripleManager {
triple_cfg: cfg.triple_cfg,
triple_storage,
failed_triples: HashMap::new(),
my_account_id,
}
}

Expand Down Expand Up @@ -201,6 +205,9 @@ impl TripleManager {
.insert(id, TripleGenerator::new(id, participants, protocol));
self.queued.push_back(id);
self.introduced.insert(id);
crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATORS
.with_label_values(&[&self.my_account_id.as_ref()])
.inc();
Ok(())
}

Expand Down Expand Up @@ -357,6 +364,9 @@ impl TripleManager {
)?);
let generator = e.insert(TripleGenerator::new(id, participants, protocol));
self.queued.push_back(id);
crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATORS
.with_label_values(&[&self.my_account_id.as_ref()])
.inc();
Ok(Some(&mut generator.protocol))
}
Entry::Occupied(e) => Ok(Some(&mut e.into_mut().protocol)),
Expand All @@ -380,9 +390,6 @@ impl TripleManager {
let mut messages = Vec::new();
let mut result = Ok(());
let mut triples_to_insert = Vec::new();
let triple_storage_read_lock = self.triple_storage.read().await;
let my_account_id = triple_storage_read_lock.account_id();
drop(triple_storage_read_lock);
self.generators.retain(|id, generator| {
if !self.ongoing.contains(id) {
// If the protocol is not ongoing, we should retain it for the next time
Expand Down Expand Up @@ -447,10 +454,14 @@ impl TripleManager {

if let Some(start_time) = generator.timestamp {
crate::metrics::TRIPLE_LATENCY
.with_label_values(&[&my_account_id])
.with_label_values(&[&self.my_account_id.as_ref()])
.observe(start_time.elapsed().as_secs_f64());
}

crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATORS_SUCCESS
.with_label_values(&[&self.my_account_id.as_ref()])
.inc();

let triple = Triple {
id: *id,
share: output.0,
Expand Down Expand Up @@ -478,6 +489,9 @@ impl TripleManager {

if triple_is_mine {
self.mine.push_back(*id);
crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATIONS_MINE_SUCCESS
.with_label_values(&[&self.my_account_id.as_ref()])
.inc();
}

self.triples.insert(*id, triple.clone());
Expand Down
1 change: 1 addition & 0 deletions node/src/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ impl TestTripleManagers {
DEFAULT_TEST_CONFIG,
vec![],
triple_storage,
num.to_string().parse().unwrap(),
)
})
.collect();
Expand Down

0 comments on commit 50f3421

Please sign in to comment.