From e12c26beddf0287a10ab626ffa7b7bc9aaffb0ee Mon Sep 17 00:00:00 2001 From: joshua-spacetime Date: Fri, 27 Oct 2023 20:21:14 -0700 Subject: [PATCH] perf(470): Add metrics for transaction cpu and elapsed time (#483) Closes #470. --- .../db/datastore/locking_tx_datastore/mod.rs | 31 ++++++++++++++++++- crates/core/src/db/db_metrics/mod.rs | 10 ++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/crates/core/src/db/datastore/locking_tx_datastore/mod.rs b/crates/core/src/db/datastore/locking_tx_datastore/mod.rs index f2027f67523..01e32de5b43 100644 --- a/crates/core/src/db/datastore/locking_tx_datastore/mod.rs +++ b/crates/core/src/db/datastore/locking_tx_datastore/mod.rs @@ -13,6 +13,7 @@ use std::{ collections::{BTreeMap, BTreeSet, HashMap}, ops::{Deref, RangeBounds}, sync::Arc, + time::{Duration, Instant}, vec, }; @@ -119,6 +120,8 @@ impl<'a> DataRef<'a> { pub struct MutTxId { lock: ArcMutexGuard, + lock_wait_time: Duration, + timer: Instant, } struct CommittedState { @@ -1972,29 +1975,55 @@ impl traits::MutTx for Locking { type MutTxId = MutTxId; fn begin_mut_tx(&self) -> Self::MutTxId { + let timer = Instant::now(); let mut inner = self.inner.lock_arc(); + let lock_wait_time = timer.elapsed(); if inner.tx_state.is_some() { panic!("The previous transaction was not properly rolled back or committed."); } inner.tx_state = Some(TxState::new()); - MutTxId { lock: inner } + MutTxId { + lock: inner, + lock_wait_time, + timer, + } } fn rollback_mut_tx(&self, ctx: &ExecutionContext, mut tx: Self::MutTxId) { + let elapsed_time = tx.timer.elapsed(); + let cpu_time = elapsed_time - tx.lock_wait_time; DB_METRICS .rdb_num_txns_rolledback .with_label_values(&ctx.txn_type(), &ctx.database(), ctx.reducer_name().unwrap_or("")) .inc(); + DB_METRICS + .rdb_txn_cpu_time_ns + .with_label_values(&ctx.txn_type(), &ctx.database(), ctx.reducer_name().unwrap_or("")) + .observe(cpu_time.as_nanos() as f64); + DB_METRICS + .rdb_txn_elapsed_time_ns + .with_label_values(&ctx.txn_type(), &ctx.database(), ctx.reducer_name().unwrap_or("")) + .observe(elapsed_time.as_nanos() as f64); tx.lock.rollback(); } fn commit_mut_tx(&self, ctx: &ExecutionContext, mut tx: Self::MutTxId) -> super::Result> { + let elapsed_time = tx.timer.elapsed(); + let cpu_time = elapsed_time - tx.lock_wait_time; // Note, we record empty transactions in our metrics. // That is, transactions that don't write any rows to the commit log. DB_METRICS .rdb_num_txns_committed .with_label_values(&ctx.txn_type(), &ctx.database(), ctx.reducer_name().unwrap_or("")) .inc(); + DB_METRICS + .rdb_txn_cpu_time_ns + .with_label_values(&ctx.txn_type(), &ctx.database(), ctx.reducer_name().unwrap_or("")) + .observe(cpu_time.as_nanos() as f64); + DB_METRICS + .rdb_txn_elapsed_time_ns + .with_label_values(&ctx.txn_type(), &ctx.database(), ctx.reducer_name().unwrap_or("")) + .observe(elapsed_time.as_nanos() as f64); tx.lock.commit() } diff --git a/crates/core/src/db/db_metrics/mod.rs b/crates/core/src/db/db_metrics/mod.rs index 60e480b8620..69accd9ed27 100644 --- a/crates/core/src/db/db_metrics/mod.rs +++ b/crates/core/src/db/db_metrics/mod.rs @@ -75,6 +75,16 @@ metrics_group!( #[help = "The cumulative number of rolled back transactions"] #[labels(txn_type: TransactionType, db: Address, reducer: str)] pub rdb_num_txns_rolledback: IntCounterVec, + + #[name = spacetime_txn_elapsed_time_ns] + #[help = "The total elapsed (wall) time of a transaction (nanoseconds)"] + #[labels(txn_type: TransactionType, db: Address, reducer: str)] + pub rdb_txn_elapsed_time_ns: HistogramVec, + + #[name = spacetime_txn_cpu_time_ns] + #[help = "The time spent executing a transaction (nanoseconds), excluding time spent waiting to acquire database locks"] + #[labels(txn_type: TransactionType, db: Address, reducer: str)] + pub rdb_txn_cpu_time_ns: HistogramVec, } );