Skip to content

Commit

Permalink
Parametrize max open files and col state cache size (#6584)
Browse files Browse the repository at this point in the history
Create store section in the neard config to pass various parameters to the store;
Add max_open_files and ColState cache size parameters,
Rework read_only store and enable statistics parameters.
  • Loading branch information
EdvardD authored and pompon0 committed Apr 15, 2022
1 parent 0022fb2 commit 355fea9
Show file tree
Hide file tree
Showing 9 changed files with 130 additions and 78 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions core/store/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ bytesize = "1.1"
derive_more = "0.99.3"
elastic-array = "0.11"
rocksdb = { version = "0.18.0", default-features = false, features = ["snappy", "lz4", "zstd", "zlib"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
num_cpus = "1.11"
rand = "0.7"
Expand Down
75 changes: 42 additions & 33 deletions core/store/src/db.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use super::StoreConfig;
use crate::db::refcount::merge_refcounted_records;
use crate::DBCol;
use near_primitives::version::DbVersion;
Expand Down Expand Up @@ -104,12 +105,12 @@ unsafe impl Sync for RocksDB {}
/// Options for configuring [`RocksDB`](RocksDB).
///
/// ```rust
/// use near_store::db::RocksDBOptions;
/// use near_store::{db::RocksDBOptions, StoreConfig};
///
/// let rocksdb = RocksDBOptions::default()
/// .check_free_space_interval(256)
/// .free_disk_space_threshold(bytesize::ByteSize::mb(10))
/// .read_only("/db/path");
/// .open("/db/path", &StoreConfig::read_only());
/// ```
pub struct RocksDBOptions {
cf_names: Option<Vec<String>>,
Expand All @@ -119,7 +120,6 @@ pub struct RocksDBOptions {
check_free_space_interval: u16,
free_space_threshold: bytesize::ByteSize,
warn_treshold: bytesize::ByteSize,
enable_statistics: bool,
}

/// Sets [`RocksDBOptions::check_free_space_interval`] to 256,
Expand All @@ -134,7 +134,6 @@ impl Default for RocksDBOptions {
check_free_space_interval: 256,
free_space_threshold: bytesize::ByteSize::mb(16),
warn_treshold: bytesize::ByteSize::mb(256),
enable_statistics: false,
}
}
}
Expand Down Expand Up @@ -177,11 +176,26 @@ impl RocksDBOptions {
self
}

/// Opens the database either in read only or in read/write mode depending on the read_only
/// parameter specified in the store_config.
pub fn open(
self,
path: impl AsRef<Path>,
store_config: &StoreConfig,
) -> Result<RocksDB, DBError> {
let path = path.as_ref();
if store_config.read_only {
return self.read_only(path, &store_config);
}
self.read_write(path, &store_config)
}

/// Opens a read only database.
pub fn read_only<P: AsRef<std::path::Path>>(self, path: P) -> Result<RocksDB, DBError> {
fn read_only(self, path: &Path, store_config: &StoreConfig) -> Result<RocksDB, DBError> {
use strum::IntoEnumIterator;
let options = self.rocksdb_options.unwrap_or_else(rocksdb_options);
let cf_with_opts = DBCol::iter().map(|col| (col_name(col), rocksdb_column_options(col)));
let options = self.rocksdb_options.unwrap_or_else(|| rocksdb_options(store_config));
let cf_with_opts =
DBCol::iter().map(|col| (col_name(col), rocksdb_column_options(col, store_config)));
let db = DB::open_cf_with_opts_for_read_only(&options, path, cf_with_opts, false)?;
let cfs = DBCol::iter()
.map(|col| db.cf_handle(&col_name(col)).unwrap() as *const ColumnFamily)
Expand All @@ -199,17 +213,22 @@ impl RocksDBOptions {
}

/// Opens the database in read/write mode.
pub fn read_write<P: AsRef<std::path::Path>>(self, path: P) -> Result<RocksDB, DBError> {
fn read_write(self, path: &Path, store_config: &StoreConfig) -> Result<RocksDB, DBError> {
use strum::IntoEnumIterator;
let mut options = self.rocksdb_options.unwrap_or_else(rocksdb_options);
if self.enable_statistics {
let mut options = self.rocksdb_options.unwrap_or_else(|| rocksdb_options(store_config));
if store_config.enable_statistics {
options = enable_statistics(options);
}
let cf_names =
self.cf_names.unwrap_or_else(|| DBCol::iter().map(|col| col_name(col)).collect());
let cf_descriptors = self.cf_descriptors.unwrap_or_else(|| {
DBCol::iter()
.map(|col| ColumnFamilyDescriptor::new(col_name(col), rocksdb_column_options(col)))
.map(|col| {
ColumnFamilyDescriptor::new(
col_name(col),
rocksdb_column_options(col, store_config),
)
})
.collect()
});
let db = DB::open_cf_descriptors(&options, path, cf_descriptors)?;
Expand All @@ -234,11 +253,6 @@ impl RocksDBOptions {
_instance_counter: InstanceCounter::new(),
})
}

pub fn enable_statistics(mut self) -> Self {
self.enable_statistics = true;
self
}
}

pub struct TestDB {
Expand Down Expand Up @@ -460,14 +474,14 @@ fn set_compression_options(opts: &mut Options) {
}

/// DB level options
fn rocksdb_options() -> Options {
fn rocksdb_options(store_config: &StoreConfig) -> Options {
let mut opts = Options::default();

set_compression_options(&mut opts);
opts.create_missing_column_families(true);
opts.create_if_missing(true);
opts.set_use_fsync(false);
opts.set_max_open_files(512);
opts.set_max_open_files(store_config.max_open_files);
opts.set_keep_log_file_num(1);
opts.set_bytes_per_sync(bytesize::MIB);
opts.set_write_buffer_size(256 * bytesize::MIB as usize);
Expand Down Expand Up @@ -517,19 +531,18 @@ fn rocksdb_block_based_options(cache_size: usize) -> BlockBasedOptions {
block_opts
}

// TODO(#5213) Use ByteSize package to represent sizes.
fn choose_cache_size(col: DBCol) -> usize {
fn choose_cache_size(col: DBCol, store_config: &StoreConfig) -> usize {
match col {
DBCol::ColState => 512 * 1024 * 1024,
DBCol::ColState => store_config.col_state_cache_size,
_ => 32 * 1024 * 1024,
}
}

fn rocksdb_column_options(col: DBCol) -> Options {
fn rocksdb_column_options(col: DBCol, store_config: &StoreConfig) -> Options {
let mut opts = Options::default();
set_compression_options(&mut opts);
opts.set_level_compaction_dynamic_level_bytes(true);
let cache_size = choose_cache_size(col);
let cache_size = choose_cache_size(col, &store_config);
opts.set_block_based_table_factory(&rocksdb_block_based_options(cache_size));

// Note that this function changes a lot of rustdb parameters including:
Expand Down Expand Up @@ -572,8 +585,8 @@ impl RocksDB {
}

/// Returns version of the database state on disk.
pub fn get_version<P: AsRef<std::path::Path>>(path: P) -> Result<DbVersion, DBError> {
let db = RocksDB::new_read_only(path)?;
pub fn get_version(path: &Path) -> Result<DbVersion, DBError> {
let db = RocksDB::new(path, &StoreConfig::read_only())?;
db.get(DBCol::ColDbVersion, VERSION_KEY).map(|result| {
serde_json::from_slice(
&result
Expand All @@ -583,12 +596,8 @@ impl RocksDB {
})
}

pub fn new_read_only<P: AsRef<std::path::Path>>(path: P) -> Result<Self, DBError> {
RocksDBOptions::default().read_only(path)
}

pub fn new<P: AsRef<std::path::Path>>(path: P) -> Result<Self, DBError> {
RocksDBOptions::default().read_write(path)
pub fn new(path: &Path, store_config: &StoreConfig) -> Result<Self, DBError> {
RocksDBOptions::default().open(path, &store_config)
}

/// Checks if there is enough memory left to perform a write. Not having enough memory left can
Expand Down Expand Up @@ -740,7 +749,7 @@ mod tests {
use crate::db::DBCol::ColState;
use crate::db::StatsValue::{Count, Percentile, Sum};
use crate::db::{parse_statistics, rocksdb_read_options, DBError, Database, RocksDB};
use crate::{create_store, DBCol, StoreStatistics};
use crate::{create_store, DBCol, StoreConfig, StoreStatistics};

impl RocksDB {
#[cfg(not(feature = "single_thread_rocksdb"))]
Expand All @@ -767,7 +776,7 @@ mod tests {
#[test]
fn test_prewrite_check() {
let tmp_dir = tempfile::Builder::new().prefix("_test_prewrite_check").tempdir().unwrap();
let store = RocksDB::new(tmp_dir).unwrap();
let store = RocksDB::new(tmp_dir.path(), &StoreConfig::read_write()).unwrap();
store.pre_write_check().unwrap()
}

Expand Down
9 changes: 5 additions & 4 deletions core/store/src/db/v6_to_v7.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ use rocksdb::{ColumnFamilyDescriptor, MergeOperands, Options};
use strum::IntoEnumIterator;

use crate::db::{col_name, rocksdb_column_options, DBError, RocksDB, RocksDBOptions};
use crate::DBCol;
use crate::{DBCol, StoreConfig};
use std::path::Path;

fn refcount_merge_v6(
_new_key: &[u8],
Expand Down Expand Up @@ -47,7 +48,7 @@ fn merge_refcounted_records_v6(result: &mut Vec<u8>, val: &[u8]) {
}

fn rocksdb_column_options_v6(col: DBCol) -> Options {
let mut opts = rocksdb_column_options(DBCol::ColDbVersion);
let mut opts = rocksdb_column_options(DBCol::ColDbVersion, &StoreConfig::read_write());

if col == DBCol::ColState {
opts.set_merge_operator("refcount merge", refcount_merge_v6, refcount_merge_v6);
Expand All @@ -57,7 +58,7 @@ fn rocksdb_column_options_v6(col: DBCol) -> Options {
}

impl RocksDB {
pub(crate) fn new_v6<P: AsRef<std::path::Path>>(path: P) -> Result<Self, DBError> {
pub(crate) fn new_v6(path: &Path) -> Result<Self, DBError> {
RocksDBOptions::default()
.cf_names(DBCol::iter().map(|col| col_name(col)).collect())
.cf_descriptors(
Expand All @@ -67,6 +68,6 @@ impl RocksDB {
})
.collect(),
)
.read_write(path)
.open(path, &StoreConfig::read_write())
}
}
71 changes: 62 additions & 9 deletions core/store/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use serde::{Deserialize, Serialize};
use std::fs::File;
use std::io::{BufReader, BufWriter, Read, Write};
use std::path::Path;
Expand Down Expand Up @@ -288,27 +289,79 @@ pub fn read_with_cache<'a, T: BorshDeserialize + 'a>(
Ok(None)
}

#[derive(Default, Debug)]
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct StoreConfig {
/// Attempted writes to the DB will fail. Doesn't require a `LOCK` file.
#[serde(skip)]
pub read_only: bool,

/// Re-export storage layer statistics as prometheus metrics.
/// Minor performance impact is expected.
#[serde(default)]
pub enable_statistics: bool,

/// Maximum number of store files being opened simultaneously.
/// Default value: 512.
/// The underlying storage can require simultaneously opening a large number of files.
/// Increasing this value helps to prevent the storage constantly closing/opening files it
/// needs.
/// Increasing this value up to a value higher than 1024 also requires setting `ulimit -n` in
/// Linux.
#[serde(default = "default_max_open_files")]
pub max_open_files: i32,

/// Cache size for ColState column.
/// Default value: 512MiB.
/// Increasing ColState cache size helps making storage more efficient. On the other hand we
/// don't want to increase hugely requirements for running a node so currently we use a small
/// default value for it.
#[serde(default = "default_col_state_cache_size")]
pub col_state_cache_size: usize,
}

pub fn create_store(path: &Path) -> Store {
create_store_with_config(path, StoreConfig::default())
fn default_max_open_files() -> i32 {
StoreConfig::DEFAULT_MAX_OPEN_FILES
}

pub fn create_store_with_config(path: &Path, store_config: StoreConfig) -> Store {
let mut opts = RocksDBOptions::default();
if store_config.enable_statistics {
opts = opts.enable_statistics();
fn default_col_state_cache_size() -> usize {
StoreConfig::DEFAULT_COL_STATE_CACHE_SIZE
}

impl StoreConfig {
/// This is a value that we've used since 3 Dec 2019.
pub const DEFAULT_MAX_OPEN_FILES: i32 = 512;

/// We used to have the same cache size for all columns 32MB. When some RocksDB
/// inefficiencies were found ColState cache size was increased up to 512MB.
/// This was done Nov 13 2021 and we consider increasing the value.
pub const DEFAULT_COL_STATE_CACHE_SIZE: usize = 512 * bytesize::MIB as usize;

pub fn read_only() -> StoreConfig {
StoreConfig::read_write().with_read_only(true)
}

let db = if store_config.read_only { opts.read_only(path) } else { opts.read_write(path) }
.expect("Failed to open the database");
pub fn read_write() -> StoreConfig {
StoreConfig {
read_only: false,
enable_statistics: false,
max_open_files: default_max_open_files(),
col_state_cache_size: default_col_state_cache_size(),
}
}

pub fn with_read_only(mut self, read_only: bool) -> Self {
self.read_only = read_only;
self
}
}

pub fn create_store(path: &Path) -> Store {
create_store_with_config(path, &StoreConfig::read_write())
}

pub fn create_store_with_config(path: &Path, store_config: &StoreConfig) -> Store {
let db =
RocksDBOptions::default().open(path, &store_config).expect("Failed to open the database");
Store::new(Arc::new(db))
}

Expand Down
8 changes: 3 additions & 5 deletions nearcore/benches/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use std::time::{Duration, Instant};
/// took on avg 6.169248ms op per sec 162 items read 10000
/// took on avg 1.424615ms op per sec 701 items read 10000
/// took on avg 1.416562ms op per sec 705 items read 10000
/// ```
/// ```
fn read_trie_items(bench: &mut Bencher, shard_id: usize, read_only: bool) {
init_integration_logger();
let home_dir = get_default_home();
Expand All @@ -28,10 +28,8 @@ fn read_trie_items(bench: &mut Bencher, shard_id: usize, read_only: bool) {

bench.iter(move || {
tracing::info!(target: "neard", "{:?}", home_dir);
let store = create_store_with_config(
&get_store_path(&home_dir),
StoreConfig { read_only, enable_statistics: false },
);
let store_config = StoreConfig::read_write().with_read_only(read_only);
let store = create_store_with_config(&get_store_path(&home_dir), &store_config);

let mut chain_store =
ChainStore::new(store.clone(), near_config.genesis.config.genesis_height, true);
Expand Down
10 changes: 3 additions & 7 deletions nearcore/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,10 +330,6 @@ fn default_use_checkpoints_for_db_migration() -> bool {
true
}

fn default_enable_rocksdb_statistics() -> bool {
false
}

#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct Consensus {
/// Minimum number of peers to start syncing.
Expand Down Expand Up @@ -457,8 +453,8 @@ pub struct Config {
/// For example, setting "use_db_migration_snapshot" to "/tmp/" will create a directory "/tmp/db_migration_snapshot" and populate it with the database files.
#[serde(skip_serializing_if = "Option::is_none")]
pub db_migration_snapshot_path: Option<PathBuf>,
#[serde(default = "default_enable_rocksdb_statistics")]
pub enable_rocksdb_statistics: bool,
/// Different parameters to configure/optimize underlying storage.
pub store: near_store::StoreConfig,
}

impl Default for Config {
Expand Down Expand Up @@ -487,7 +483,7 @@ impl Default for Config {
max_gas_burnt_view: None,
db_migration_snapshot_path: None,
use_db_migration_snapshot: true,
enable_rocksdb_statistics: false,
store: near_store::StoreConfig::read_write(),
}
}
}
Expand Down
Loading

0 comments on commit 355fea9

Please sign in to comment.