diff --git a/Cargo.lock b/Cargo.lock index 08948ffbd970b..214c4f72c7d0d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4665,6 +4665,20 @@ dependencies = [ "memoffset 0.6.5", ] +[[package]] +name = "nix" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46a58d1d356c6597d08cde02c2f09d785b09e28711837b1ed667dc652c08a694" +dependencies = [ + "bitflags", + "cfg-if", + "libc", + "memoffset 0.7.1", + "pin-utils", + "static_assertions", +] + [[package]] name = "node-bench" version = "0.9.0-dev" @@ -4752,6 +4766,7 @@ dependencies = [ "sc-rpc", "sc-service", "sc-service-test", + "sc-storage-monitor", "sc-sync-state-rpc", "sc-sysinfo", "sc-telemetry", @@ -8895,6 +8910,7 @@ dependencies = [ "sc-rpc", "sc-rpc-server", "sc-rpc-spec-v2", + "sc-storage-monitor", "sc-sysinfo", "sc-telemetry", "sc-tracing", @@ -8973,6 +8989,21 @@ dependencies = [ "sp-core", ] +[[package]] +name = "sc-storage-monitor" +version = "0.1.0" +dependencies = [ + "clap 4.0.32", + "futures", + "log", + "nix 0.26.1", + "sc-client-db", + "sc-utils", + "sp-core", + "thiserror", + "tokio", +] + [[package]] name = "sc-sync-state-rpc" version = "0.10.0-dev" diff --git a/Cargo.toml b/Cargo.toml index 8f55d8e527ecd..f7eedadd1e8d7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,6 +64,7 @@ members = [ "client/service", "client/service/test", "client/state-db", + "client/storage-monitor", "client/sysinfo", "client/sync-state-rpc", "client/telemetry", diff --git a/bin/node/cli/Cargo.toml b/bin/node/cli/Cargo.toml index 4d9279b857eed..8a883ee0119ec 100644 --- a/bin/node/cli/Cargo.toml +++ b/bin/node/cli/Cargo.toml @@ -81,6 +81,7 @@ sc-executor = { version = "0.10.0-dev", path = "../../../client/executor" } sc-authority-discovery = { version = "0.10.0-dev", path = "../../../client/authority-discovery" } sc-sync-state-rpc = { version = "0.10.0-dev", path = "../../../client/sync-state-rpc" } sc-sysinfo = { version = "6.0.0-dev", path = "../../../client/sysinfo" } +sc-storage-monitor = { version = "0.1.0", path = "../../../client/storage-monitor" } # frame dependencies frame-system = { version = "4.0.0-dev", path = "../../../frame/system" } @@ -138,6 +139,7 @@ substrate-frame-cli = { version = "4.0.0-dev", optional = true, path = "../../.. try-runtime-cli = { version = "0.10.0-dev", optional = true, path = "../../../utils/frame/try-runtime/cli" } sc-cli = { version = "0.10.0-dev", path = "../../../client/cli", optional = true } pallet-balances = { version = "4.0.0-dev", path = "../../../frame/balances" } +sc-storage-monitor = { version = "0.1.0", path = "../../../client/storage-monitor" } [features] default = ["cli"] diff --git a/bin/node/cli/src/cli.rs b/bin/node/cli/src/cli.rs index bb7f8a4c60aa9..7bea336c8e41a 100644 --- a/bin/node/cli/src/cli.rs +++ b/bin/node/cli/src/cli.rs @@ -36,6 +36,10 @@ pub struct Cli { /// telemetry, if telemetry is enabled. #[arg(long)] pub no_hardware_benchmarks: bool, + + #[allow(missing_docs)] + #[clap(flatten)] + pub storage_monitor: sc_storage_monitor::StorageMonitorParams, } /// Possible subcommands of the main binary. diff --git a/bin/node/cli/src/command.rs b/bin/node/cli/src/command.rs index fd464bbc914a5..2ed8a2c754018 100644 --- a/bin/node/cli/src/command.rs +++ b/bin/node/cli/src/command.rs @@ -87,8 +87,7 @@ pub fn run() -> Result<()> { None => { let runner = cli.create_runner(&cli.run)?; runner.run_node_until_exit(|config| async move { - service::new_full(config, cli.no_hardware_benchmarks) - .map_err(sc_cli::Error::Service) + service::new_full(config, cli).map_err(sc_cli::Error::Service) }) }, Some(Subcommand::Inspect(cmd)) => { diff --git a/bin/node/cli/src/service.rs b/bin/node/cli/src/service.rs index d77a333dfa220..e329087947c98 100644 --- a/bin/node/cli/src/service.rs +++ b/bin/node/cli/src/service.rs @@ -20,6 +20,7 @@ //! Service implementation. Specialized wrapper over substrate service. +use crate::Cli; use codec::Encode; use frame_benchmarking_cli::SUBSTRATE_REFERENCE_HARDWARE; use frame_system_rpc_runtime_api::AccountNonceApi; @@ -556,12 +557,18 @@ pub fn new_full_base( } /// Builds a new service for a full client. -pub fn new_full( - config: Configuration, - disable_hardware_benchmarks: bool, -) -> Result { - new_full_base(config, disable_hardware_benchmarks, |_, _| ()) - .map(|NewFullBase { task_manager, .. }| task_manager) +pub fn new_full(config: Configuration, cli: Cli) -> Result { + let database_source = config.database.clone(); + let task_manager = new_full_base(config, cli.no_hardware_benchmarks, |_, _| ()) + .map(|NewFullBase { task_manager, .. }| task_manager)?; + + sc_storage_monitor::StorageMonitorService::try_spawn( + cli.storage_monitor, + database_source, + &task_manager.spawn_essential_handle(), + )?; + + Ok(task_manager) } #[cfg(test)] diff --git a/client/cli/src/params/database_params.rs b/client/cli/src/params/database_params.rs index fdd3622580a6d..06a154fd60867 100644 --- a/client/cli/src/params/database_params.rs +++ b/client/cli/src/params/database_params.rs @@ -19,7 +19,7 @@ use crate::arg_enums::Database; use clap::Args; -/// Parameters for block import. +/// Parameters for database #[derive(Debug, Clone, PartialEq, Args)] pub struct DatabaseParams { /// Select database backend to use. @@ -32,7 +32,7 @@ pub struct DatabaseParams { } impl DatabaseParams { - /// Limit the memory the database cache can use. + /// Database backend pub fn database(&self) -> Option { self.database } diff --git a/client/service/Cargo.toml b/client/service/Cargo.toml index 6122895d2300c..b4ce3bbbb7f1c 100644 --- a/client/service/Cargo.toml +++ b/client/service/Cargo.toml @@ -73,6 +73,7 @@ sc-offchain = { version = "4.0.0-dev", path = "../offchain" } prometheus-endpoint = { package = "substrate-prometheus-endpoint", path = "../../utils/prometheus", version = "0.10.0-dev" } sc-tracing = { version = "4.0.0-dev", path = "../tracing" } sc-sysinfo = { version = "6.0.0-dev", path = "../sysinfo" } +sc-storage-monitor = { version = "0.1.0", path = "../storage-monitor" } tracing = "0.1.29" tracing-futures = { version = "0.2.4" } async-trait = "0.1.57" diff --git a/client/service/src/error.rs b/client/service/src/error.rs index 001a83922d776..ec2951193964c 100644 --- a/client/service/src/error.rs +++ b/client/service/src/error.rs @@ -48,6 +48,9 @@ pub enum Error { #[error(transparent)] Telemetry(#[from] sc_telemetry::Error), + #[error(transparent)] + Storage(#[from] sc_storage_monitor::Error), + #[error("Best chain selection strategy (SelectChain) is not provided.")] SelectChainRequired, diff --git a/client/storage-monitor/Cargo.toml b/client/storage-monitor/Cargo.toml new file mode 100644 index 0000000000000..2ba24f9e2e718 --- /dev/null +++ b/client/storage-monitor/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "sc-storage-monitor" +version = "0.1.0" +authors = ["Parity Technologies "] +edition = "2021" +license = "GPL-3.0-or-later WITH Classpath-exception-2.0" +repository = "https://github.com/paritytech/substrate" +description = "Storage monitor service for substrate" +homepage = "https://substrate.io" + +[dependencies] +clap = { version = "4.0.9", features = ["derive", "string"] } +futures = "0.3.21" +log = "0.4.17" +nix = { version = "0.26.1", features = ["fs"] } +sc-client-db = { version = "0.10.0-dev", default-features = false, path = "../db" } +sc-utils = { version = "4.0.0-dev", path = "../utils" } +sp-core = { version = "7.0.0", path = "../../primitives/core" } +tokio = "1.22.0" +thiserror = "1.0.30" diff --git a/client/storage-monitor/src/lib.rs b/client/storage-monitor/src/lib.rs new file mode 100644 index 0000000000000..39bd15675b350 --- /dev/null +++ b/client/storage-monitor/src/lib.rs @@ -0,0 +1,149 @@ +// This file is part of Substrate. + +// Copyright (C) 2022 Parity Technologies (UK) Ltd. +// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0 + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +use clap::Args; +use nix::{errno::Errno, sys::statvfs::statvfs}; +use sc_client_db::DatabaseSource; +use sp_core::traits::SpawnEssentialNamed; +use std::{ + path::{Path, PathBuf}, + time::Duration, +}; + +const LOG_TARGET: &str = "storage-monitor"; + +/// Error type used in this crate. +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("IO Error")] + IOError(#[from] Errno), + #[error("Out of storage space: available {0}MB, required {1}MB")] + StorageOutOfSpace(u64, u64), +} + +/// Parameters used to create the storage monitor. +#[derive(Default, Debug, Clone, Args)] +pub struct StorageMonitorParams { + /// Required available space on database storage. If available space for DB storage drops below + /// the given threshold, node will be gracefully terminated. If `0` is given monitoring will be + /// disabled. + #[arg(long = "db-storage-threshold", value_name = "MB", default_value_t = 1000)] + pub threshold: u64, + + /// How often available space is polled. + #[arg(long = "db-storage-polling-period", value_name = "SECONDS", default_value_t = 5, value_parser = clap::value_parser!(u32).range(1..))] + pub polling_period: u32, +} + +/// Storage monitor service: checks the available space for the filesystem for fiven path. +pub struct StorageMonitorService { + /// watched path + path: PathBuf, + /// number of megabytes that shall be free on the filesystem for watched path + threshold: u64, + /// storage space polling period (seconds) + polling_period: u32, +} + +impl StorageMonitorService { + /// Creates new StorageMonitorService for given client config + pub fn try_spawn( + parameters: StorageMonitorParams, + database: DatabaseSource, + spawner: &impl SpawnEssentialNamed, + ) -> Result<(), Error> { + Ok(match (parameters.threshold, database.path()) { + (0, _) => { + log::info!( + target: LOG_TARGET, + "StorageMonitorService: threshold `0` given, storage monitoring disabled", + ); + }, + (_, None) => { + log::warn!( + target: LOG_TARGET, + "StorageMonitorService: no database path to observe", + ); + }, + (threshold, Some(path)) => { + log::debug!( + target: LOG_TARGET, + "Initializing StorageMonitorService for db path: {:?}", + path, + ); + + Self::check_free_space(&path, threshold)?; + + let storage_monitor_service = StorageMonitorService { + path: path.to_path_buf(), + threshold, + polling_period: parameters.polling_period, + }; + + spawner.spawn_essential( + "storage-monitor", + None, + Box::pin(storage_monitor_service.run()), + ); + }, + }) + } + + /// Main monitoring loop, intended to be spawned as essential task. Quits if free space drop + /// below threshold. + async fn run(self) { + loop { + tokio::time::sleep(Duration::from_secs(self.polling_period.into())).await; + if Self::check_free_space(&self.path, self.threshold).is_err() { + break + }; + } + } + + /// Returns free space in MB, or error if statvfs failed. + fn free_space(path: &Path) -> Result { + statvfs(path) + .map(|stats| stats.blocks_available() * stats.block_size() / 1_000_000) + .map_err(Error::from) + } + + /// Checks if the amount of free space for given `path` is above given `threshold`. + /// If it dropped below, error is returned. + /// System errors are silently ignored. + fn check_free_space(path: &Path, threshold: u64) -> Result<(), Error> { + match StorageMonitorService::free_space(path) { + Ok(available_space) => { + log::trace!( + target: LOG_TARGET, + "free: {available_space} , threshold: {threshold}.", + ); + + if available_space < threshold { + log::error!(target: LOG_TARGET, "Available space {available_space}MB for path `{}` dropped below threshold: {threshold}MB , terminating...", path.display()); + Err(Error::StorageOutOfSpace(available_space, threshold)) + } else { + Ok(()) + } + }, + Err(e) => { + log::error!(target: LOG_TARGET, "Could not read available space: {:?}.", e); + Err(e) + }, + } + } +}