diff --git a/core/store/src/metrics.rs b/core/store/src/metrics.rs index e677530af21..e81321774e3 100644 --- a/core/store/src/metrics.rs +++ b/core/store/src/metrics.rs @@ -1,4 +1,7 @@ -use near_metrics::{try_create_histogram_vec, HistogramVec}; +use near_metrics::{ + try_create_histogram_vec, try_create_int_counter_vec, try_create_int_gauge_vec, HistogramVec, + IntCounterVec, IntGaugeVec, +}; use once_cell::sync::Lazy; pub(crate) static DATABASE_OP_LATENCY_HIST: Lazy = Lazy::new(|| { @@ -10,3 +13,62 @@ pub(crate) static DATABASE_OP_LATENCY_HIST: Lazy = Lazy::new(|| { ) .unwrap() }); + +pub static CHUNK_CACHE_HITS: Lazy = Lazy::new(|| { + try_create_int_counter_vec( + "near_chunk_cache_hits", + "Chunk cache hits", + &["shard_id", "is_view"], + ) + .unwrap() +}); + +pub static CHUNK_CACHE_MISSES: Lazy = Lazy::new(|| { + try_create_int_counter_vec( + "near_chunk_cache_misses", + "Chunk cache misses", + &["shard_id", "is_view"], + ) + .unwrap() +}); + +pub static SHARD_CACHE_HITS: Lazy = Lazy::new(|| { + try_create_int_counter_vec( + "near_shard_cache_hits", + "Shard cache hits", + &["shard_id", "is_view"], + ) + .unwrap() +}); + +pub static SHARD_CACHE_MISSES: Lazy = Lazy::new(|| { + try_create_int_counter_vec( + "near_shard_cache_misses", + "Shard cache misses", + &["shard_id", "is_view"], + ) + .unwrap() +}); + +pub static SHARD_CACHE_TOO_LARGE: Lazy = Lazy::new(|| { + try_create_int_counter_vec( + "near_shard_cache_too_large", + "Number of values to be inserted into shard cache is too large", + &["shard_id", "is_view"], + ) + .unwrap() +}); + +pub static SHARD_CACHE_SIZE: Lazy = Lazy::new(|| { + try_create_int_gauge_vec("near_shard_cache_size", "Shard cache size", &["shard_id", "is_view"]) + .unwrap() +}); + +pub static CHUNK_CACHE_SIZE: Lazy = Lazy::new(|| { + try_create_int_gauge_vec("near_chunk_cache_size", "Chunk cache size", &["shard_id", "is_view"]) + .unwrap() +}); + +pub static SHARD_CACHE_POPS: Lazy = Lazy::new(|| { + try_create_int_counter_vec("near_shard_cache_pops", "Shard cache pops", &["shard_id"]).unwrap() +}); diff --git a/core/store/src/trie/shard_tries.rs b/core/store/src/trie/shard_tries.rs index 25ec14beeef..24f656665bd 100644 --- a/core/store/src/trie/shard_tries.rs +++ b/core/store/src/trie/shard_tries.rs @@ -115,7 +115,8 @@ impl ShardTries { .or_insert_with(|| self.0.trie_cache_factory.create_cache(&shard_uid)) .clone() }; - let storage = Box::new(TrieCachingStorage::new(self.0.store.clone(), cache, shard_uid)); + let storage = + Box::new(TrieCachingStorage::new(self.0.store.clone(), cache, shard_uid, is_view)); let flat_state = { #[cfg(feature = "protocol_feature_flat_state")] if use_flat_state { @@ -179,7 +180,7 @@ impl ShardTries { .entry(shard_uid) .or_insert_with(|| self.0.trie_cache_factory.create_cache(&shard_uid)) .clone(); - cache.update_cache(ops); + cache.update_cache(ops, shard_uid); } Ok(()) } diff --git a/core/store/src/trie/trie_storage.rs b/core/store/src/trie/trie_storage.rs index 04069072cbc..74a4f43b96c 100644 --- a/core/store/src/trie/trie_storage.rs +++ b/core/store/src/trie/trie_storage.rs @@ -6,7 +6,7 @@ use near_primitives::hash::CryptoHash; use crate::db::refcount::decode_value_with_rc; use crate::trie::POISONED_LOCK_ERR; -use crate::{DBCol, StorageError, Store}; +use crate::{metrics, DBCol, StorageError, Store}; use lru::LruCache; use near_primitives::shard_layout::ShardUId; use near_primitives::types::{TrieCacheMode, TrieNodesCount}; @@ -34,19 +34,34 @@ impl TrieCache { self.0.lock().expect(POISONED_LOCK_ERR).clear() } - pub fn update_cache(&self, ops: Vec<(CryptoHash, Option<&Vec>)>) { + pub fn update_cache(&self, ops: Vec<(CryptoHash, Option<&Vec>)>, shard_uid: ShardUId) { + let shard_id_str = format!("{}", shard_uid.shard_id); + let labels: [&str; 1] = [&shard_id_str]; + let mut guard = self.0.lock().expect(POISONED_LOCK_ERR); for (hash, opt_value_rc) in ops { if let Some(value_rc) = opt_value_rc { if let (Some(value), _rc) = decode_value_with_rc(&value_rc) { if value.len() < TRIE_LIMIT_CACHED_VALUE_SIZE { guard.put(hash, value.into()); + } else { + metrics::SHARD_CACHE_TOO_LARGE.with_label_values(&labels).inc(); } } else { - guard.pop(&hash); + match guard.pop(&hash) { + Some(_) => { + metrics::SHARD_CACHE_POPS.with_label_values(&labels).inc(); + } + _ => {} + }; } } else { - guard.pop(&hash); + match guard.pop(&hash) { + Some(_) => { + metrics::SHARD_CACHE_POPS.with_label_values(&labels).inc(); + } + _ => {} + }; } } } @@ -179,10 +194,17 @@ pub struct TrieCachingStorage { pub(crate) db_read_nodes: Cell, /// Counts trie nodes retrieved from the chunk cache. pub(crate) mem_read_nodes: Cell, + /// Boolean for determining if the cache is a view cache or not + is_view: bool, } impl TrieCachingStorage { - pub fn new(store: Store, shard_cache: TrieCache, shard_uid: ShardUId) -> TrieCachingStorage { + pub fn new( + store: Store, + shard_cache: TrieCache, + shard_uid: ShardUId, + is_view: bool, + ) -> TrieCachingStorage { TrieCachingStorage { store, shard_uid, @@ -191,6 +213,7 @@ impl TrieCachingStorage { chunk_cache: RefCell::new(Default::default()), db_read_nodes: Cell::new(0), mem_read_nodes: Cell::new(0), + is_view, } } @@ -227,25 +250,45 @@ impl TrieCachingStorage { pub fn set_mode(&self, state: TrieCacheMode) { self.cache_mode.set(state); } + + fn update_cache_size_metrics(&self, labels: [&str; 2]) { + { + metrics::CHUNK_CACHE_SIZE + .with_label_values(&labels) + .set(self.chunk_cache.borrow().len() as i64); + metrics::SHARD_CACHE_SIZE + .with_label_values(&labels) + .set(self.shard_cache.0.lock().expect(POISONED_LOCK_ERR).len() as i64); + } + } } impl TrieStorage for TrieCachingStorage { fn retrieve_raw_bytes(&self, hash: &CryptoHash) -> Result, StorageError> { + let shard_id_str = format!("{}", self.shard_uid.shard_id); + let is_view_str = format!("{}", self.is_view as u8); + let labels: [&str; 2] = [&shard_id_str, &is_view_str]; + + self.update_cache_size_metrics(labels); // Try to get value from chunk cache containing nodes with cheaper access. We can do it for any `TrieCacheMode`, // because we charge for reading nodes only when `CachingChunk` mode is enabled anyway. if let Some(val) = self.chunk_cache.borrow_mut().get(hash) { + metrics::CHUNK_CACHE_HITS.with_label_values(&labels).inc(); self.inc_mem_read_nodes(); return Ok(val.clone()); } + metrics::CHUNK_CACHE_MISSES.with_label_values(&labels).inc(); // Try to get value from shard cache containing most recently touched nodes. let mut guard = self.shard_cache.0.lock().expect(POISONED_LOCK_ERR); let val = match guard.get(hash) { Some(val) => { + metrics::SHARD_CACHE_HITS.with_label_values(&labels).inc(); near_o11y::io_trace!(count: "shard_cache_hit"); val.clone() } None => { + metrics::SHARD_CACHE_MISSES.with_label_values(&labels).inc(); near_o11y::io_trace!(count: "shard_cache_miss"); // If value is not present in cache, get it from the storage. let key = Self::get_key_from_shard_uid_and_hash(self.shard_uid, hash); @@ -265,6 +308,7 @@ impl TrieStorage for TrieCachingStorage { if val.len() < TRIE_LIMIT_CACHED_VALUE_SIZE { guard.put(*hash, val.clone()); } else { + metrics::SHARD_CACHE_TOO_LARGE.with_label_values(&labels).inc(); near_o11y::io_trace!(count: "shard_cache_too_large"); } diff --git a/core/store/src/trie/trie_tests.rs b/core/store/src/trie/trie_tests.rs index 9abe817a0c8..03f9782b348 100644 --- a/core/store/src/trie/trie_tests.rs +++ b/core/store/src/trie/trie_tests.rs @@ -235,7 +235,8 @@ mod caching_storage_tests { let shard_uid = ShardUId::single_shard(); let store = create_store_with_values(&values, shard_uid); let trie_cache = TrieCache::new(); - let trie_caching_storage = TrieCachingStorage::new(store, trie_cache.clone(), shard_uid); + let trie_caching_storage = + TrieCachingStorage::new(store, trie_cache.clone(), shard_uid, false); let key = hash(&value); assert_eq!(trie_cache.get(&key), None); @@ -255,7 +256,8 @@ mod caching_storage_tests { fn test_retrieve_error() { let shard_uid = ShardUId::single_shard(); let store = create_test_store(); - let trie_caching_storage = TrieCachingStorage::new(store, TrieCache::new(), shard_uid); + let trie_caching_storage = + TrieCachingStorage::new(store, TrieCache::new(), shard_uid, false); let value = vec![1u8]; let key = hash(&value); @@ -271,7 +273,8 @@ mod caching_storage_tests { let shard_uid = ShardUId::single_shard(); let store = create_store_with_values(&values, shard_uid); let trie_cache = TrieCache::new(); - let trie_caching_storage = TrieCachingStorage::new(store, trie_cache.clone(), shard_uid); + let trie_caching_storage = + TrieCachingStorage::new(store, trie_cache.clone(), shard_uid, false); let key = hash(&value); trie_caching_storage.set_mode(TrieCacheMode::CachingChunk); @@ -293,7 +296,8 @@ mod caching_storage_tests { let shard_uid = ShardUId::single_shard(); let store = create_store_with_values(&values, shard_uid); let trie_cache = TrieCache::new(); - let trie_caching_storage = TrieCachingStorage::new(store, trie_cache.clone(), shard_uid); + let trie_caching_storage = + TrieCachingStorage::new(store, trie_cache.clone(), shard_uid, false); let value = &values[0]; let key = hash(&value); @@ -341,7 +345,8 @@ mod caching_storage_tests { let shard_uid = ShardUId::single_shard(); let store = create_store_with_values(&values, shard_uid); let trie_cache = TrieCache::with_capacity(shard_cache_size); - let trie_caching_storage = TrieCachingStorage::new(store, trie_cache.clone(), shard_uid); + let trie_caching_storage = + TrieCachingStorage::new(store, trie_cache.clone(), shard_uid, false); let value = &values[0]; let key = hash(&value); diff --git a/runtime/runtime-params-estimator/src/estimator_context.rs b/runtime/runtime-params-estimator/src/estimator_context.rs index da06897003b..bf6d509aa21 100644 --- a/runtime/runtime-params-estimator/src/estimator_context.rs +++ b/runtime/runtime-params-estimator/src/estimator_context.rs @@ -124,7 +124,7 @@ impl<'c> Testbed<'c> { pub(crate) fn trie_caching_storage(&mut self) -> TrieCachingStorage { let store = self.inner.store(); let caching_storage = - TrieCachingStorage::new(store, TrieCache::new(), ShardUId::single_shard()); + TrieCachingStorage::new(store, TrieCache::new(), ShardUId::single_shard(), false); caching_storage }