From be4711383cb4e093930c44aefeb59a05b272c173 Mon Sep 17 00:00:00 2001 From: Vladimir Petrzhikovskii Date: Mon, 9 Dec 2024 13:49:53 +0100 Subject: [PATCH] feat: add cells cache metrics --- scripts/gen-dashboard.py | 77 +++++++++++++++---- storage/src/store/shard_state/cell_storage.rs | 14 +++- 2 files changed, 75 insertions(+), 16 deletions(-) diff --git a/scripts/gen-dashboard.py b/scripts/gen-dashboard.py index e95f46718..79934f177 100644 --- a/scripts/gen-dashboard.py +++ b/scripts/gen-dashboard.py @@ -759,13 +759,54 @@ def storage() -> RowPanel: create_heatmap_panel( "tycho_storage_load_cell_time", "Time to load cell from storage" ), + create_counter_panel( + expr_sum_rate("tycho_storage_load_cell_time_count"), + "Number of load_cell calls", + UNITS.OPS_PER_SEC, + ), create_heatmap_panel( "tycho_storage_get_cell_from_rocksdb_time", "Time to load cell from RocksDB" ), + create_counter_panel( + expr_sum_rate("tycho_storage_get_cell_from_rocksdb_time_count"), + "Number of cache missed cell loads", + UNITS.OPS_PER_SEC, + ), + timeseries_panel( + title="Storage Cache Hit Rate", + targets=[ + target( + expr=expr_operator( + expr_operator( + "1", + "-", + expr_operator( + expr_sum_rate( + "tycho_storage_get_cell_from_rocksdb_time_count", + ), + "/", + expr_sum_rate( + "tycho_storage_load_cell_time_count", + ), + ), + ), + "*", + "100", + ), + legend_format="Hit Rate", + ) + ], + unit=UNITS.PERCENT_FORMAT, + ), + create_counter_panel( + "tycho_storage_raw_cells_cache_size", + "Raw cells cache size", + UNITS.BYTES_IEC, + ), create_heatmap_quantile_panel( "tycho_storage_store_block_data_size", "Block data size", - UNITS.BYTES, + UNITS.BYTES_IEC, "0.999", ), create_heatmap_quantile_panel( @@ -1631,7 +1672,7 @@ def mempool_point_rates() -> RowPanel: create_counter_panel( "tycho_mempool_msgs_unique_bytes", "Adapter: unique externals size", - unit_format=UNITS.BYTES, + unit_format=UNITS.BYTES_IEC, ), create_counter_panel( "tycho_mempool_msgs_duplicates_count", @@ -1640,7 +1681,7 @@ def mempool_point_rates() -> RowPanel: create_counter_panel( "tycho_mempool_msgs_duplicates_bytes", "Adapter: removed duplicate externals size", - unit_format=UNITS.BYTES, + unit_format=UNITS.BYTES_IEC, ), create_counter_panel( "tycho_mempool_point_payload_count", @@ -1649,7 +1690,7 @@ def mempool_point_rates() -> RowPanel: create_counter_panel( "tycho_mempool_point_payload_bytes", "Engine: points payload size", - unit_format=UNITS.BYTES, + unit_format=UNITS.BYTES_IEC, ), create_counter_panel( "tycho_mempool_evicted_externals_count", @@ -1658,7 +1699,7 @@ def mempool_point_rates() -> RowPanel: create_counter_panel( "tycho_mempool_evicted_externals_size", "Input buffer: evicted externals size", - unit_format=UNITS.BYTES, + unit_format=UNITS.BYTES_IEC, ), ] return create_row("Mempool point rates", metrics) @@ -1983,15 +2024,23 @@ def collator_execution_manager() -> RowPanel: def allocator_stats() -> RowPanel: metrics = [ - create_gauge_panel("jemalloc_allocated_bytes", "Allocated Bytes", UNITS.BYTES), - create_gauge_panel("jemalloc_active_bytes", "Active Bytes", UNITS.BYTES), - create_gauge_panel("jemalloc_metadata_bytes", "Metadata Bytes", UNITS.BYTES), - create_gauge_panel("jemalloc_resident_bytes", "Resident Bytes", UNITS.BYTES), - create_gauge_panel("jemalloc_mapped_bytes", "Mapped Bytes", UNITS.BYTES), - create_gauge_panel("jemalloc_retained_bytes", "Retained Bytes", UNITS.BYTES), - create_gauge_panel("jemalloc_dirty_bytes", "Dirty Bytes", UNITS.BYTES), - create_gauge_panel( - "jemalloc_fragmentation_bytes", "Fragmentation Bytes", UNITS.BYTES + create_gauge_panel( + "jemalloc_allocated_bytes", "Allocated Bytes", UNITS.BYTES_IEC + ), + create_gauge_panel("jemalloc_active_bytes", "Active Bytes", UNITS.BYTES_IEC), + create_gauge_panel( + "jemalloc_metadata_bytes", "Metadata Bytes", UNITS.BYTES_IEC + ), + create_gauge_panel( + "jemalloc_resident_bytes", "Resident Bytes", UNITS.BYTES_IEC + ), + create_gauge_panel("jemalloc_mapped_bytes", "Mapped Bytes", UNITS.BYTES_IEC), + create_gauge_panel( + "jemalloc_retained_bytes", "Retained Bytes", UNITS.BYTES_IEC + ), + create_gauge_panel("jemalloc_dirty_bytes", "Dirty Bytes", UNITS.BYTES_IEC), + create_gauge_panel( + "jemalloc_fragmentation_bytes", "Fragmentation Bytes", UNITS.BYTES_IEC ), ] return create_row("Allocator Stats", metrics) diff --git a/storage/src/store/shard_state/cell_storage.rs b/storage/src/store/shard_state/cell_storage.rs index 2e104e8b2..b85f86356 100644 --- a/storage/src/store/shard_state/cell_storage.rs +++ b/storage/src/store/shard_state/cell_storage.rs @@ -3,14 +3,14 @@ use std::collections::hash_map; use std::mem::{ManuallyDrop, MaybeUninit}; use std::sync::atomic::{AtomicI64, AtomicU8, Ordering}; use std::sync::{Arc, Weak}; -use std::time::Instant; +use std::time::{Duration, Instant}; use anyhow::{Context, Result}; use bumpalo::Bump; use everscale_types::cell::*; use quick_cache::sync::{Cache, DefaultLifecycle}; use triomphe::ThinArc; -use tycho_util::metrics::HistogramGuard; +use tycho_util::metrics::{spawn_metrics_loop, HistogramGuard}; use tycho_util::{FastDashMap, FastHashMap, FastHasherState}; use weedb::rocksdb::WriteBatch; use weedb::{rocksdb, BoundedCfHandle}; @@ -30,6 +30,12 @@ impl CellStorage { let cells_cache = Default::default(); let raw_cells_cache = Arc::new(RawCellsCache::new(cache_size_bytes)); + spawn_metrics_loop( + &raw_cells_cache.clone(), + Duration::from_secs(5), + |c| async move { c.refresh_metrics() }, + ); + Arc::new(Self { db, cells_cache, @@ -1083,4 +1089,8 @@ impl RawCellsCache { v.header.header.store(rc, Ordering::Release); } + + fn refresh_metrics(&self) { + metrics::gauge!("tycho_storage_raw_cells_cache_size").set(self.inner.weight() as f64); + } }