diff --git a/Cargo.lock b/Cargo.lock index 1bc3c6130b..fab8a30983 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4349,6 +4349,7 @@ dependencies = [ "spacetimedb-vm", "sqlparser", "strum", + "sys-info", "tempfile", "thiserror", "tokio", @@ -4747,6 +4748,16 @@ dependencies = [ "yaml-rust", ] +[[package]] +name = "sys-info" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b3a0d0aba8bf96a0e1ddfdc352fc53b3df7f39318c71854910c3c4b024ae52c" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "system-configuration" version = "0.5.1" diff --git a/Cargo.toml b/Cargo.toml index aad85a4913..25287681d7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -157,6 +157,7 @@ sqllogictest = "0.17" strum = { version = "0.25.0", features = ["derive"] } syn = { version = "2", features = ["full", "extra-traits"] } syntect = { version = "5.0.0", default-features = false, features = ["default-fancy"] } +sys-info = "0.9.1" tabled = "0.14.0" tar = "0.4" tempdir = "0.3.7" diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 6213d71f84..b211bbb7b2 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -71,6 +71,7 @@ slab.workspace = true sled.workspace = true sqlparser.workspace = true strum.workspace = true +sys-info.workspace = true tempfile.workspace = true thiserror.workspace = true tokio-util.workspace = true diff --git a/crates/core/src/database_instance_context.rs b/crates/core/src/database_instance_context.rs index 0d78031a43..6c1f3896f1 100644 --- a/crates/core/src/database_instance_context.rs +++ b/crates/core/src/database_instance_context.rs @@ -6,6 +6,7 @@ use crate::db::ostorage::sled_object_db::SledObjectDB; use crate::db::ostorage::ObjectDB; use crate::db::relational_db::RelationalDB; use crate::db::{Config, FsyncPolicy, Storage}; +use crate::error::DBError; use crate::identity::Identity; use crate::messages::control_db::Database; use std::path::{Path, PathBuf}; @@ -93,4 +94,19 @@ impl DatabaseInstanceContext { pub(crate) fn make_default_ostorage(path: impl AsRef) -> Box { Box::new(SledObjectDB::open(path).unwrap()) } + + /// The number of bytes on disk occupied by the [MessageLog]. + pub fn message_log_size_on_disk(&self) -> Result { + self.relational_db.commit_log().message_log_size_on_disk() + } + + /// The number of bytes on disk occupied by the [ObjectDB]. + pub fn object_db_size_on_disk(&self) -> Result { + self.relational_db.commit_log().object_db_size_on_disk() + } + + /// The size of the log file. + pub fn log_file_size(&self) -> Result { + self.logger.size() + } } diff --git a/crates/core/src/database_instance_context_controller.rs b/crates/core/src/database_instance_context_controller.rs index c60b9ca052..9d726a1f49 100644 --- a/crates/core/src/database_instance_context_controller.rs +++ b/crates/core/src/database_instance_context_controller.rs @@ -1,7 +1,9 @@ use std::sync::Arc; use std::{collections::HashMap, sync::Mutex}; +use crate::db::db_metrics::DB_METRICS; use crate::host::scheduler::Scheduler; +use crate::worker_metrics::WORKER_METRICS; use super::database_instance_context::DatabaseInstanceContext; @@ -33,4 +35,41 @@ impl DatabaseInstanceContextController { let mut contexts = self.contexts.lock().unwrap(); contexts.remove(&database_instance_id) } + + #[tracing::instrument(skip_all)] + pub fn update_metrics(&self) { + // Update global disk usage metrics + if let Ok(info) = sys_info::disk_info() { + WORKER_METRICS.system_disk_space_free.set(info.free as i64); + WORKER_METRICS.system_disk_space_total.set(info.total as i64); + } + // Update memory usage metrics + if let Ok(info) = sys_info::mem_info() { + WORKER_METRICS.system_memory_free.set(info.free as i64); + WORKER_METRICS.system_memory_total.set(info.total as i64); + } + for (db, _) in self.contexts.lock().unwrap().values() { + // Use the previous gauge value if there is an issue getting the file size. + if let Ok(num_bytes) = db.message_log_size_on_disk() { + DB_METRICS + .message_log_size + .with_label_values(&db.address) + .set(num_bytes as i64); + } + // Use the previous gauge value if there is an issue getting the file size. + if let Ok(num_bytes) = db.object_db_size_on_disk() { + DB_METRICS + .object_db_disk_usage + .with_label_values(&db.address) + .set(num_bytes as i64); + } + // Use the previous gauge value if there is an issue getting the file size. + if let Ok(num_bytes) = db.log_file_size() { + DB_METRICS + .module_log_file_size + .with_label_values(&db.address) + .set(num_bytes as i64); + } + } + } } diff --git a/crates/core/src/database_logger.rs b/crates/core/src/database_logger.rs index a2732a6c98..c2ac8cd1c1 100644 --- a/crates/core/src/database_logger.rs +++ b/crates/core/src/database_logger.rs @@ -1,4 +1,5 @@ use crate::address::Address; +use crate::error::DBError; use std::fs::OpenOptions; use std::fs::{self, File}; use std::io::{prelude::*, SeekFrom}; @@ -142,6 +143,10 @@ impl DatabaseLogger { Self { file, tx } } + pub fn size(&self) -> Result { + Ok(self.file.metadata()?.len()) + } + pub fn _delete(&mut self) { self.file.set_len(0).unwrap(); self.file.seek(SeekFrom::End(0)).unwrap(); diff --git a/crates/core/src/db/commit_log.rs b/crates/core/src/db/commit_log.rs index 57e6d80548..a8614ee1ce 100644 --- a/crates/core/src/db/commit_log.rs +++ b/crates/core/src/db/commit_log.rs @@ -188,6 +188,22 @@ pub struct CommitLogView { } impl CommitLogView { + /// The number of bytes on disk occupied by the [MessageLog]. + pub fn message_log_size_on_disk(&self) -> Result { + if let Some(ref mlog) = self.mlog { + let guard = mlog.lock().unwrap(); + Ok(guard.size()) + } else { + Ok(0) + } + } + + /// The number of bytes on disk occupied by the [ObjectDB]. + pub fn object_db_size_on_disk(&self) -> Result { + let guard = self.odb.lock().unwrap(); + guard.size_on_disk() + } + /// Obtain an iterator over a snapshot of the raw message log segments. /// /// See also: [`MessageLog::segments`] diff --git a/crates/core/src/db/db_metrics/mod.rs b/crates/core/src/db/db_metrics/mod.rs index 2ec94f1eee..15951a2973 100644 --- a/crates/core/src/db/db_metrics/mod.rs +++ b/crates/core/src/db/db_metrics/mod.rs @@ -105,6 +105,21 @@ metrics_group!( #[help = "The total duration of a spacetime wasm abi call (in seconds); includes row serialization and copying into wasm memory"] #[labels(txn_type: TransactionType, db: Address, reducer: str, call: AbiCall)] pub wasm_abi_call_duration_sec: HistogramVec, + + #[name = spacetime_message_log_size_bytes] + #[help = "For a given database, the number of bytes occupied by its message log"] + #[labels(db: Address)] + pub message_log_size: IntGaugeVec, + + #[name = spacetime_object_db_disk_usage] + #[help = "For a given database, the number of bytes occupied by large object storage"] + #[labels(db: Address)] + pub object_db_disk_usage: IntGaugeVec, + + #[name = spacetime_module_log_file_size_bytes] + #[help = "For a given module, the size of its log file (in bytes)"] + #[labels(db: Address)] + pub module_log_file_size: IntGaugeVec, } ); diff --git a/crates/core/src/db/ostorage/hashmap_object_db.rs b/crates/core/src/db/ostorage/hashmap_object_db.rs index f7a0c4f662..fddda14f0e 100644 --- a/crates/core/src/db/ostorage/hashmap_object_db.rs +++ b/crates/core/src/db/ostorage/hashmap_object_db.rs @@ -186,6 +186,10 @@ impl ObjectDB for HashMapObjectDB { } Ok(()) } + + fn size_on_disk(&self) -> Result { + Ok(self.total_mem_size_bytes()) + } } fn hex_prefixes() -> Vec { diff --git a/crates/core/src/db/ostorage/memory_object_db.rs b/crates/core/src/db/ostorage/memory_object_db.rs index 4975a571d3..162aa81575 100644 --- a/crates/core/src/db/ostorage/memory_object_db.rs +++ b/crates/core/src/db/ostorage/memory_object_db.rs @@ -31,4 +31,8 @@ impl ObjectDB for MemoryObjectDB { fn sync_all(&mut self) -> Result<(), crate::error::DBError> { Ok(()) } + + fn size_on_disk(&self) -> Result { + Ok(0) + } } diff --git a/crates/core/src/db/ostorage/mod.rs b/crates/core/src/db/ostorage/mod.rs index 917a42b813..9e25475552 100644 --- a/crates/core/src/db/ostorage/mod.rs +++ b/crates/core/src/db/ostorage/mod.rs @@ -20,4 +20,5 @@ pub trait ObjectDB { fn get(&self, hash: Hash) -> Option; fn flush(&mut self) -> Result<(), DBError>; fn sync_all(&mut self) -> Result<(), DBError>; + fn size_on_disk(&self) -> Result; } diff --git a/crates/core/src/db/ostorage/rocks_object_db.rs b/crates/core/src/db/ostorage/rocks_object_db.rs index fd08c4eeb3..4435a187be 100644 --- a/crates/core/src/db/ostorage/rocks_object_db.rs +++ b/crates/core/src/db/ostorage/rocks_object_db.rs @@ -66,6 +66,11 @@ impl ObjectDB for RocksDBObjectDB { fn sync_all(&mut self) -> Result<(), DBError> { self.flush() } + + fn size_on_disk(&self) -> Result { + // TODO: Compute the size of the rocksdb instance + Ok(0) + } } #[cfg(test)] diff --git a/crates/core/src/db/ostorage/sled_object_db.rs b/crates/core/src/db/ostorage/sled_object_db.rs index 6df41ee99d..45ed2a5bc4 100644 --- a/crates/core/src/db/ostorage/sled_object_db.rs +++ b/crates/core/src/db/ostorage/sled_object_db.rs @@ -17,7 +17,6 @@ impl SledObjectDB { .flush_every_ms(Some(50)) .mode(HighThroughput); let db = config.open()?; - Ok(Self { db }) } } @@ -48,6 +47,10 @@ impl ObjectDB for SledObjectDB { fn sync_all(&mut self) -> Result<(), DBError> { self.flush() } + + fn size_on_disk(&self) -> Result { + Ok(self.db.size_on_disk()?) + } } #[cfg(test)] diff --git a/crates/core/src/worker_metrics/mod.rs b/crates/core/src/worker_metrics/mod.rs index 9751d52f61..68caf0da41 100644 --- a/crates/core/src/worker_metrics/mod.rs +++ b/crates/core/src/worker_metrics/mod.rs @@ -1,6 +1,6 @@ use crate::util::typed_prometheus::metrics_group; use once_cell::sync::Lazy; -use prometheus::{Gauge, GaugeVec, HistogramVec, IntCounterVec, IntGaugeVec}; +use prometheus::{Gauge, GaugeVec, HistogramVec, IntCounterVec, IntGauge, IntGaugeVec}; use spacetimedb_lib::{Address, Hash, Identity}; metrics_group!( @@ -68,6 +68,22 @@ metrics_group!( #[help = "Length of the wait queue for access to a module instance."] #[labels(identity: Identity, module_hash: Hash, database_address: Address)] pub instance_queue_length: IntGaugeVec, + + #[name = spacetime_system_disk_space_total_bytes] + #[help = "A node's total disk space (in bytes)"] + pub system_disk_space_total: IntGauge, + + #[name = spacetime_system_disk_space_free_bytes] + #[help = "A node's free (unused) disk space (in bytes)"] + pub system_disk_space_free: IntGauge, + + #[name = spacetime_system_memory_total_bytes] + #[help = "A node's total available memory (in bytes)"] + pub system_memory_total: IntGauge, + + #[name = spacetime_system_memory_free_bytes] + #[help = "A node's current available (free) memory (in bytes)"] + pub system_memory_free: IntGauge, } ); diff --git a/crates/standalone/src/lib.rs b/crates/standalone/src/lib.rs index 2eb7129267..ff183048aa 100644 --- a/crates/standalone/src/lib.rs +++ b/crates/standalone/src/lib.rs @@ -163,6 +163,8 @@ fn get_key_path(env: &str) -> Option { #[async_trait] impl spacetimedb_client_api::NodeDelegate for StandaloneEnv { fn gather_metrics(&self) -> Vec { + // Note, we update certain metrics such as disk usage on demand. + self.db_inst_ctx_controller.update_metrics(); self.metrics_registry.gather() }