Skip to content

Commit

Permalink
Merge pull request #804 from near/xiangyi/failure_metrics
Browse files Browse the repository at this point in the history
metrics: add failure counter for triple/presig/signature
  • Loading branch information
ppca authored Aug 12, 2024
2 parents 48a572b + b025104 commit 568fb11
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 11 deletions.
45 changes: 45 additions & 0 deletions chain-signatures/node/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,51 @@ pub(crate) static FAILED_SEND_ENCRYPTED_LATENCY: Lazy<HistogramVec> = Lazy::new(
.unwrap()
});

pub(crate) static NUM_TOTAL_HISTORICAL_SIGNATURE_GENERATORS: Lazy<IntGaugeVec> = Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_num_total_historical_signature_generators",
"number of all signature generators historically on the node",
&["node_account_id"],
)
.unwrap()
});

pub(crate) static TRIPLE_GENERATOR_FAILURES: Lazy<IntGaugeVec> = Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_triple_generator_failures",
"total triple generator failures",
&["node_account_id"],
)
.unwrap()
});

pub(crate) static SIGNATURE_GENERATOR_FAILURES: Lazy<IntGaugeVec> = Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_signature_generator_failures",
"total signature generator failures",
&["node_account_id"],
)
.unwrap()
});

pub(crate) static PRESIGNATURE_GENERATOR_FAILURES: Lazy<IntGaugeVec> = Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_presignature_generator_failures",
"total presignature generator failures",
&["node_account_id"],
)
.unwrap()
});

pub(crate) static SIGNATURE_FAILURES: Lazy<IntGaugeVec> = Lazy::new(|| {
try_create_int_gauge_vec(
"multichain_signature_failures",
"total signature failures",
&["node_account_id"],
)
.unwrap()
});

pub fn try_create_int_gauge_vec(name: &str, help: &str, labels: &[&str]) -> Result<IntGaugeVec> {
check_metric_multichain_prefix(name)?;
let opts = Opts::new(name, help);
Expand Down
2 changes: 2 additions & 0 deletions chain-signatures/node/src/protocol/consensus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ impl ConsensusProtocol for StartedState {
me,
contract_state.public_key,
epoch,
ctx.my_account_id(),
),
)),
messages: Default::default(),
Expand Down Expand Up @@ -391,6 +392,7 @@ impl ConsensusProtocol for WaitingForConsensusState {
me,
self.public_key,
self.epoch,
ctx.my_account_id(),
))),
messages: self.messages,
}))
Expand Down
7 changes: 1 addition & 6 deletions chain-signatures/node/src/protocol/cryptography.rs
Original file line number Diff line number Diff line change
Expand Up @@ -457,12 +457,7 @@ impl CryptographicProtocol for RunningState {
messages.push(info.clone(), MpcMessage::Signature(msg));
}
signature_manager
.publish(
ctx.rpc_client(),
ctx.signer(),
ctx.mpc_contract_id(),
&my_account_id,
)
.publish(ctx.rpc_client(), ctx.signer(), ctx.mpc_contract_id())
.await;
drop(signature_manager);
let failures = messages
Expand Down
3 changes: 3 additions & 0 deletions chain-signatures/node/src/protocol/presignature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,9 @@ impl PresignatureManager {
let action = match generator.poke() {
Ok(action) => action,
Err(e) => {
crate::metrics::PRESIGNATURE_GENERATOR_FAILURES
.with_label_values(&[self.my_account_id.as_str()])
.inc();
self.gc.insert(*id, Instant::now());
self.introduced.remove(id);
errors.push(e);
Expand Down
31 changes: 26 additions & 5 deletions chain-signatures/node/src/protocol/signature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ pub struct SignatureManager {
me: Participant,
public_key: PublicKey,
epoch: u64,
my_account_id: AccountId,
}

pub const MAX_RETRY: u8 = 10;
Expand Down Expand Up @@ -274,7 +275,12 @@ impl ToPublish {
}

impl SignatureManager {
pub fn new(me: Participant, public_key: PublicKey, epoch: u64) -> Self {
pub fn new(
me: Participant,
public_key: PublicKey,
epoch: u64,
my_account_id: &AccountId,
) -> Self {
Self {
generators: HashMap::new(),
failed: VecDeque::new(),
Expand All @@ -283,6 +289,7 @@ impl SignatureManager {
me,
public_key,
epoch,
my_account_id: my_account_id.clone(),
}
}

Expand Down Expand Up @@ -364,6 +371,9 @@ impl SignatureManager {
req,
cfg,
)?;
crate::metrics::NUM_TOTAL_HISTORICAL_SIGNATURE_GENERATORS
.with_label_values(&[self.my_account_id.as_str()])
.inc();
self.generators.insert(receipt_id, generator);
Ok(())
}
Expand Down Expand Up @@ -404,6 +414,9 @@ impl SignatureManager {
},
cfg,
)?;
crate::metrics::NUM_TOTAL_HISTORICAL_SIGNATURE_GENERATORS
.with_label_values(&[self.my_account_id.as_str()])
.inc();
self.generators.insert(receipt_id, generator);
Ok(())
}
Expand Down Expand Up @@ -474,6 +487,9 @@ impl SignatureManager {
}
};
let generator = entry.insert(generator);
crate::metrics::NUM_TOTAL_HISTORICAL_SIGNATURE_GENERATORS
.with_label_values(&[self.my_account_id.as_str()])
.inc();
Ok(&mut generator.protocol)
}
Entry::Occupied(entry) => Ok(&mut entry.into_mut().protocol),
Expand All @@ -494,6 +510,9 @@ impl SignatureManager {
if generator.proposer == self.me {
if generator.sign_request_timestamp.elapsed() < generator.timeout_total {
tracing::warn!(?err, "signature failed to be produced; pushing request back into failed queue");
crate::metrics::SIGNATURE_GENERATOR_FAILURES
.with_label_values(&[self.my_account_id.as_str()])
.inc();
// only retry the signature generation if it was initially proposed by us. We do not
// want any nodes to be proposing the same signature multiple times.
self.failed.push_back((
Expand All @@ -509,6 +528,9 @@ impl SignatureManager {
));
} else {
self.completed.insert(*receipt_id, Instant::now());
crate::metrics::SIGNATURE_FAILURES
.with_label_values(&[self.my_account_id.as_str()])
.inc();
tracing::warn!(?err, "signature failed to be produced; trashing request");
}
}
Expand Down Expand Up @@ -676,7 +698,6 @@ impl SignatureManager {
rpc_client: &near_fetch::Client,
signer: &T,
mpc_contract_id: &AccountId,
my_account_id: &AccountId,
) {
let mut to_retry: Vec<ToPublish> = Vec::new();

Expand Down Expand Up @@ -733,14 +754,14 @@ impl SignatureManager {
};

crate::metrics::NUM_SIGN_SUCCESS
.with_label_values(&[my_account_id.as_str()])
.with_label_values(&[self.my_account_id.as_str()])
.inc();
crate::metrics::SIGN_LATENCY
.with_label_values(&[my_account_id.as_str()])
.with_label_values(&[self.my_account_id.as_str()])
.observe(time_added.elapsed().as_secs_f64());
if time_added.elapsed().as_secs() <= 30 {
crate::metrics::NUM_SIGN_SUCCESS_30S
.with_label_values(&[my_account_id.as_str()])
.with_label_values(&[self.my_account_id.as_str()])
.inc();
}
}
Expand Down
3 changes: 3 additions & 0 deletions chain-signatures/node/src/protocol/triple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,9 @@ impl TripleManager {
Ok(action) => action,
Err(e) => {
errors.push(e);
crate::metrics::TRIPLE_GENERATOR_FAILURES
.with_label_values(&[self.my_account_id.as_str()])
.inc();
self.gc.insert(*id, Instant::now());
self.ongoing.remove(id);
self.introduced.remove(id);
Expand Down

0 comments on commit 568fb11

Please sign in to comment.