From 355fea994254dc3057f0d4f2007f73a4e79de8d0 Mon Sep 17 00:00:00 2001 From: EdvardD Date: Thu, 14 Apr 2022 08:50:26 +0200 Subject: [PATCH] Parametrize max open files and col state cache size (#6584) Create store section in the neard config to pass various parameters to the store; Add max_open_files and ColState cache size parameters, Rework read_only store and enable statistics parameters. --- Cargo.lock | 1 + core/store/Cargo.toml | 1 + core/store/src/db.rs | 75 ++++++++++++++++++++--------------- core/store/src/db/v6_to_v7.rs | 9 +++-- core/store/src/lib.rs | 71 ++++++++++++++++++++++++++++----- nearcore/benches/store.rs | 8 ++-- nearcore/src/config.rs | 10 ++--- nearcore/src/lib.rs | 29 +++++--------- tools/state-viewer/src/cli.rs | 4 +- 9 files changed, 130 insertions(+), 78 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 63701a72ee9..b172b4ff412 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3182,6 +3182,7 @@ dependencies = [ "once_cell", "rand 0.7.3", "rocksdb", + "serde", "serde_json", "strum", "tempfile", diff --git a/core/store/Cargo.toml b/core/store/Cargo.toml index 313d630dbd0..048084e4f00 100644 --- a/core/store/Cargo.toml +++ b/core/store/Cargo.toml @@ -14,6 +14,7 @@ bytesize = "1.1" derive_more = "0.99.3" elastic-array = "0.11" rocksdb = { version = "0.18.0", default-features = false, features = ["snappy", "lz4", "zstd", "zlib"] } +serde = { version = "1", features = ["derive"] } serde_json = "1" num_cpus = "1.11" rand = "0.7" diff --git a/core/store/src/db.rs b/core/store/src/db.rs index 0427dd7ac37..a0c68df2446 100644 --- a/core/store/src/db.rs +++ b/core/store/src/db.rs @@ -1,3 +1,4 @@ +use super::StoreConfig; use crate::db::refcount::merge_refcounted_records; use crate::DBCol; use near_primitives::version::DbVersion; @@ -104,12 +105,12 @@ unsafe impl Sync for RocksDB {} /// Options for configuring [`RocksDB`](RocksDB). /// /// ```rust -/// use near_store::db::RocksDBOptions; +/// use near_store::{db::RocksDBOptions, StoreConfig}; /// /// let rocksdb = RocksDBOptions::default() /// .check_free_space_interval(256) /// .free_disk_space_threshold(bytesize::ByteSize::mb(10)) -/// .read_only("/db/path"); +/// .open("/db/path", &StoreConfig::read_only()); /// ``` pub struct RocksDBOptions { cf_names: Option>, @@ -119,7 +120,6 @@ pub struct RocksDBOptions { check_free_space_interval: u16, free_space_threshold: bytesize::ByteSize, warn_treshold: bytesize::ByteSize, - enable_statistics: bool, } /// Sets [`RocksDBOptions::check_free_space_interval`] to 256, @@ -134,7 +134,6 @@ impl Default for RocksDBOptions { check_free_space_interval: 256, free_space_threshold: bytesize::ByteSize::mb(16), warn_treshold: bytesize::ByteSize::mb(256), - enable_statistics: false, } } } @@ -177,11 +176,26 @@ impl RocksDBOptions { self } + /// Opens the database either in read only or in read/write mode depending on the read_only + /// parameter specified in the store_config. + pub fn open( + self, + path: impl AsRef, + store_config: &StoreConfig, + ) -> Result { + let path = path.as_ref(); + if store_config.read_only { + return self.read_only(path, &store_config); + } + self.read_write(path, &store_config) + } + /// Opens a read only database. - pub fn read_only>(self, path: P) -> Result { + fn read_only(self, path: &Path, store_config: &StoreConfig) -> Result { use strum::IntoEnumIterator; - let options = self.rocksdb_options.unwrap_or_else(rocksdb_options); - let cf_with_opts = DBCol::iter().map(|col| (col_name(col), rocksdb_column_options(col))); + let options = self.rocksdb_options.unwrap_or_else(|| rocksdb_options(store_config)); + let cf_with_opts = + DBCol::iter().map(|col| (col_name(col), rocksdb_column_options(col, store_config))); let db = DB::open_cf_with_opts_for_read_only(&options, path, cf_with_opts, false)?; let cfs = DBCol::iter() .map(|col| db.cf_handle(&col_name(col)).unwrap() as *const ColumnFamily) @@ -199,17 +213,22 @@ impl RocksDBOptions { } /// Opens the database in read/write mode. - pub fn read_write>(self, path: P) -> Result { + fn read_write(self, path: &Path, store_config: &StoreConfig) -> Result { use strum::IntoEnumIterator; - let mut options = self.rocksdb_options.unwrap_or_else(rocksdb_options); - if self.enable_statistics { + let mut options = self.rocksdb_options.unwrap_or_else(|| rocksdb_options(store_config)); + if store_config.enable_statistics { options = enable_statistics(options); } let cf_names = self.cf_names.unwrap_or_else(|| DBCol::iter().map(|col| col_name(col)).collect()); let cf_descriptors = self.cf_descriptors.unwrap_or_else(|| { DBCol::iter() - .map(|col| ColumnFamilyDescriptor::new(col_name(col), rocksdb_column_options(col))) + .map(|col| { + ColumnFamilyDescriptor::new( + col_name(col), + rocksdb_column_options(col, store_config), + ) + }) .collect() }); let db = DB::open_cf_descriptors(&options, path, cf_descriptors)?; @@ -234,11 +253,6 @@ impl RocksDBOptions { _instance_counter: InstanceCounter::new(), }) } - - pub fn enable_statistics(mut self) -> Self { - self.enable_statistics = true; - self - } } pub struct TestDB { @@ -460,14 +474,14 @@ fn set_compression_options(opts: &mut Options) { } /// DB level options -fn rocksdb_options() -> Options { +fn rocksdb_options(store_config: &StoreConfig) -> Options { let mut opts = Options::default(); set_compression_options(&mut opts); opts.create_missing_column_families(true); opts.create_if_missing(true); opts.set_use_fsync(false); - opts.set_max_open_files(512); + opts.set_max_open_files(store_config.max_open_files); opts.set_keep_log_file_num(1); opts.set_bytes_per_sync(bytesize::MIB); opts.set_write_buffer_size(256 * bytesize::MIB as usize); @@ -517,19 +531,18 @@ fn rocksdb_block_based_options(cache_size: usize) -> BlockBasedOptions { block_opts } -// TODO(#5213) Use ByteSize package to represent sizes. -fn choose_cache_size(col: DBCol) -> usize { +fn choose_cache_size(col: DBCol, store_config: &StoreConfig) -> usize { match col { - DBCol::ColState => 512 * 1024 * 1024, + DBCol::ColState => store_config.col_state_cache_size, _ => 32 * 1024 * 1024, } } -fn rocksdb_column_options(col: DBCol) -> Options { +fn rocksdb_column_options(col: DBCol, store_config: &StoreConfig) -> Options { let mut opts = Options::default(); set_compression_options(&mut opts); opts.set_level_compaction_dynamic_level_bytes(true); - let cache_size = choose_cache_size(col); + let cache_size = choose_cache_size(col, &store_config); opts.set_block_based_table_factory(&rocksdb_block_based_options(cache_size)); // Note that this function changes a lot of rustdb parameters including: @@ -572,8 +585,8 @@ impl RocksDB { } /// Returns version of the database state on disk. - pub fn get_version>(path: P) -> Result { - let db = RocksDB::new_read_only(path)?; + pub fn get_version(path: &Path) -> Result { + let db = RocksDB::new(path, &StoreConfig::read_only())?; db.get(DBCol::ColDbVersion, VERSION_KEY).map(|result| { serde_json::from_slice( &result @@ -583,12 +596,8 @@ impl RocksDB { }) } - pub fn new_read_only>(path: P) -> Result { - RocksDBOptions::default().read_only(path) - } - - pub fn new>(path: P) -> Result { - RocksDBOptions::default().read_write(path) + pub fn new(path: &Path, store_config: &StoreConfig) -> Result { + RocksDBOptions::default().open(path, &store_config) } /// Checks if there is enough memory left to perform a write. Not having enough memory left can @@ -740,7 +749,7 @@ mod tests { use crate::db::DBCol::ColState; use crate::db::StatsValue::{Count, Percentile, Sum}; use crate::db::{parse_statistics, rocksdb_read_options, DBError, Database, RocksDB}; - use crate::{create_store, DBCol, StoreStatistics}; + use crate::{create_store, DBCol, StoreConfig, StoreStatistics}; impl RocksDB { #[cfg(not(feature = "single_thread_rocksdb"))] @@ -767,7 +776,7 @@ mod tests { #[test] fn test_prewrite_check() { let tmp_dir = tempfile::Builder::new().prefix("_test_prewrite_check").tempdir().unwrap(); - let store = RocksDB::new(tmp_dir).unwrap(); + let store = RocksDB::new(tmp_dir.path(), &StoreConfig::read_write()).unwrap(); store.pre_write_check().unwrap() } diff --git a/core/store/src/db/v6_to_v7.rs b/core/store/src/db/v6_to_v7.rs index 69b1cce7228..b09c42a91d7 100644 --- a/core/store/src/db/v6_to_v7.rs +++ b/core/store/src/db/v6_to_v7.rs @@ -5,7 +5,8 @@ use rocksdb::{ColumnFamilyDescriptor, MergeOperands, Options}; use strum::IntoEnumIterator; use crate::db::{col_name, rocksdb_column_options, DBError, RocksDB, RocksDBOptions}; -use crate::DBCol; +use crate::{DBCol, StoreConfig}; +use std::path::Path; fn refcount_merge_v6( _new_key: &[u8], @@ -47,7 +48,7 @@ fn merge_refcounted_records_v6(result: &mut Vec, val: &[u8]) { } fn rocksdb_column_options_v6(col: DBCol) -> Options { - let mut opts = rocksdb_column_options(DBCol::ColDbVersion); + let mut opts = rocksdb_column_options(DBCol::ColDbVersion, &StoreConfig::read_write()); if col == DBCol::ColState { opts.set_merge_operator("refcount merge", refcount_merge_v6, refcount_merge_v6); @@ -57,7 +58,7 @@ fn rocksdb_column_options_v6(col: DBCol) -> Options { } impl RocksDB { - pub(crate) fn new_v6>(path: P) -> Result { + pub(crate) fn new_v6(path: &Path) -> Result { RocksDBOptions::default() .cf_names(DBCol::iter().map(|col| col_name(col)).collect()) .cf_descriptors( @@ -67,6 +68,6 @@ impl RocksDB { }) .collect(), ) - .read_write(path) + .open(path, &StoreConfig::read_write()) } } diff --git a/core/store/src/lib.rs b/core/store/src/lib.rs index a2556c62c2d..f42735c8b98 100644 --- a/core/store/src/lib.rs +++ b/core/store/src/lib.rs @@ -1,3 +1,4 @@ +use serde::{Deserialize, Serialize}; use std::fs::File; use std::io::{BufReader, BufWriter, Read, Write}; use std::path::Path; @@ -288,27 +289,79 @@ pub fn read_with_cache<'a, T: BorshDeserialize + 'a>( Ok(None) } -#[derive(Default, Debug)] +#[derive(Serialize, Deserialize, Clone, Debug)] pub struct StoreConfig { /// Attempted writes to the DB will fail. Doesn't require a `LOCK` file. + #[serde(skip)] pub read_only: bool, + /// Re-export storage layer statistics as prometheus metrics. /// Minor performance impact is expected. + #[serde(default)] pub enable_statistics: bool, + + /// Maximum number of store files being opened simultaneously. + /// Default value: 512. + /// The underlying storage can require simultaneously opening a large number of files. + /// Increasing this value helps to prevent the storage constantly closing/opening files it + /// needs. + /// Increasing this value up to a value higher than 1024 also requires setting `ulimit -n` in + /// Linux. + #[serde(default = "default_max_open_files")] + pub max_open_files: i32, + + /// Cache size for ColState column. + /// Default value: 512MiB. + /// Increasing ColState cache size helps making storage more efficient. On the other hand we + /// don't want to increase hugely requirements for running a node so currently we use a small + /// default value for it. + #[serde(default = "default_col_state_cache_size")] + pub col_state_cache_size: usize, } -pub fn create_store(path: &Path) -> Store { - create_store_with_config(path, StoreConfig::default()) +fn default_max_open_files() -> i32 { + StoreConfig::DEFAULT_MAX_OPEN_FILES } -pub fn create_store_with_config(path: &Path, store_config: StoreConfig) -> Store { - let mut opts = RocksDBOptions::default(); - if store_config.enable_statistics { - opts = opts.enable_statistics(); +fn default_col_state_cache_size() -> usize { + StoreConfig::DEFAULT_COL_STATE_CACHE_SIZE +} + +impl StoreConfig { + /// This is a value that we've used since 3 Dec 2019. + pub const DEFAULT_MAX_OPEN_FILES: i32 = 512; + + /// We used to have the same cache size for all columns 32MB. When some RocksDB + /// inefficiencies were found ColState cache size was increased up to 512MB. + /// This was done Nov 13 2021 and we consider increasing the value. + pub const DEFAULT_COL_STATE_CACHE_SIZE: usize = 512 * bytesize::MIB as usize; + + pub fn read_only() -> StoreConfig { + StoreConfig::read_write().with_read_only(true) } - let db = if store_config.read_only { opts.read_only(path) } else { opts.read_write(path) } - .expect("Failed to open the database"); + pub fn read_write() -> StoreConfig { + StoreConfig { + read_only: false, + enable_statistics: false, + max_open_files: default_max_open_files(), + col_state_cache_size: default_col_state_cache_size(), + } + } + + pub fn with_read_only(mut self, read_only: bool) -> Self { + self.read_only = read_only; + self + } +} + +pub fn create_store(path: &Path) -> Store { + create_store_with_config(path, &StoreConfig::read_write()) +} + +pub fn create_store_with_config(path: &Path, store_config: &StoreConfig) -> Store { + let db = + RocksDBOptions::default().open(path, &store_config).expect("Failed to open the database"); Store::new(Arc::new(db)) } diff --git a/nearcore/benches/store.rs b/nearcore/benches/store.rs index 4d2b9ed7bbf..6a2cec8ffb5 100644 --- a/nearcore/benches/store.rs +++ b/nearcore/benches/store.rs @@ -18,7 +18,7 @@ use std::time::{Duration, Instant}; /// took on avg 6.169248ms op per sec 162 items read 10000 /// took on avg 1.424615ms op per sec 701 items read 10000 /// took on avg 1.416562ms op per sec 705 items read 10000 -/// ``` +/// ``` fn read_trie_items(bench: &mut Bencher, shard_id: usize, read_only: bool) { init_integration_logger(); let home_dir = get_default_home(); @@ -28,10 +28,8 @@ fn read_trie_items(bench: &mut Bencher, shard_id: usize, read_only: bool) { bench.iter(move || { tracing::info!(target: "neard", "{:?}", home_dir); - let store = create_store_with_config( - &get_store_path(&home_dir), - StoreConfig { read_only, enable_statistics: false }, - ); + let store_config = StoreConfig::read_write().with_read_only(read_only); + let store = create_store_with_config(&get_store_path(&home_dir), &store_config); let mut chain_store = ChainStore::new(store.clone(), near_config.genesis.config.genesis_height, true); diff --git a/nearcore/src/config.rs b/nearcore/src/config.rs index f5537d13032..0ca9b5586c5 100644 --- a/nearcore/src/config.rs +++ b/nearcore/src/config.rs @@ -330,10 +330,6 @@ fn default_use_checkpoints_for_db_migration() -> bool { true } -fn default_enable_rocksdb_statistics() -> bool { - false -} - #[derive(Serialize, Deserialize, Clone, Debug)] pub struct Consensus { /// Minimum number of peers to start syncing. @@ -457,8 +453,8 @@ pub struct Config { /// For example, setting "use_db_migration_snapshot" to "/tmp/" will create a directory "/tmp/db_migration_snapshot" and populate it with the database files. #[serde(skip_serializing_if = "Option::is_none")] pub db_migration_snapshot_path: Option, - #[serde(default = "default_enable_rocksdb_statistics")] - pub enable_rocksdb_statistics: bool, + /// Different parameters to configure/optimize underlying storage. + pub store: near_store::StoreConfig, } impl Default for Config { @@ -487,7 +483,7 @@ impl Default for Config { max_gas_burnt_view: None, db_migration_snapshot_path: None, use_db_migration_snapshot: true, - enable_rocksdb_statistics: false, + store: near_store::StoreConfig::read_write(), } } } diff --git a/nearcore/src/lib.rs b/nearcore/src/lib.rs index 3637df1c4f1..399c6c3e68f 100644 --- a/nearcore/src/lib.rs +++ b/nearcore/src/lib.rs @@ -29,7 +29,7 @@ use near_store::migrations::{ migrate_6_to_7, migrate_7_to_8, migrate_8_to_9, migrate_9_to_10, set_store_version, }; use near_store::DBCol; -use near_store::{create_store, create_store_with_config, Store, StoreConfig}; +use near_store::{create_store, create_store_with_config, Store}; use near_telemetry::TelemetryActor; use std::fs; use std::path::{Path, PathBuf}; @@ -105,7 +105,7 @@ fn create_db_checkpoint(path: &Path, near_config: &NearConfig) -> Result Result near_primitives::version::DB_VERSION { error!(target: "near", "DB version {} is created by a newer version of neard, please update neard or delete data", db_version); std::process::exit(1); @@ -338,7 +338,7 @@ pub fn apply_store_migrations(path: &Path, near_config: &NearConfig) { #[cfg(not(feature = "nightly_protocol"))] { - let db_version = get_store_version(path); + let db_version = get_store_version(&path); debug_assert_eq!(db_version, near_primitives::version::DB_VERSION); } @@ -368,13 +368,8 @@ pub fn init_and_migrate_store(home_dir: &Path, near_config: &NearConfig) -> Stor if store_exists { apply_store_migrations(&path, near_config); } - let store = create_store_with_config( - &path, - StoreConfig { - read_only: false, - enable_statistics: near_config.config.enable_rocksdb_statistics, - }, - ); + let store = + create_store_with_config(&path, &near_config.config.store.clone().with_read_only(false)); if !store_exists { set_store_version(&store, near_primitives::version::DB_VERSION); } @@ -518,9 +513,9 @@ pub fn recompress_storage(home_dir: &Path, opts: RecompressOpts) -> anyhow::Resu use strum::IntoEnumIterator; let config_path = home_dir.join(config::CONFIG_FILENAME); - let archive = config::Config::from_file(&config_path) - .map_err(|err| anyhow::anyhow!("{}: {}", config_path.display(), err))? - .archive; + let config = config::Config::from_file(&config_path) + .map_err(|err| anyhow::anyhow!("{}: {}", config_path.display(), err))?; + let archive = config.archive; let mut skip_columns = Vec::new(); if archive && !opts.keep_partial_chunks { skip_columns.push(near_store::DBCol::ColPartialChunks); @@ -552,6 +547,7 @@ pub fn recompress_storage(home_dir: &Path, opts: RecompressOpts) -> anyhow::Resu "{}: source storage doesn’t exist", src_dir.display() ); + let store_config = config.store.with_read_only(true); let db_version = get_store_version(&src_dir); anyhow::ensure!( db_version == near_primitives::version::DB_VERSION, @@ -568,10 +564,7 @@ pub fn recompress_storage(home_dir: &Path, opts: RecompressOpts) -> anyhow::Resu ); info!(target: "recompress", src = %src_dir.display(), dest = %opts.dest_dir.display(), "Recompressing database"); - let src_store = create_store_with_config( - &src_dir, - StoreConfig { read_only: true, enable_statistics: false }, - ); + let src_store = create_store_with_config(&src_dir, &store_config); let final_head_height = if skip_columns.contains(&DBCol::ColPartialChunks) { let tip: Option = diff --git a/tools/state-viewer/src/cli.rs b/tools/state-viewer/src/cli.rs index 321c97ee68f..3c147048b41 100644 --- a/tools/state-viewer/src/cli.rs +++ b/tools/state-viewer/src/cli.rs @@ -7,7 +7,7 @@ use near_primitives::account::id::AccountId; use near_primitives::hash::CryptoHash; use near_primitives::sharding::ChunkHash; use near_primitives::types::{BlockHeight, ShardId}; -use near_store::{create_store_with_config, Store, StoreConfig}; +use near_store::{create_store_with_config, Store}; use nearcore::{get_store_path, load_config, NearConfig}; use std::path::{Path, PathBuf}; use std::str::FromStr; @@ -78,7 +78,7 @@ impl StateViewerSubCommand { let near_config = load_config(home_dir, genesis_validation) .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); let store_path = get_store_path(home_dir); - let store_config = StoreConfig { read_only: !readwrite, ..StoreConfig::default() }; + let store_config = &near_config.config.store.clone().with_read_only(!readwrite); let store = create_store_with_config(&store_path, store_config); match self { StateViewerSubCommand::Peers => peers(store),