Skip to content

Commit

Permalink
feat: trie cache configuration (#7578)
Browse files Browse the repository at this point in the history
Make the shard cache max total bytes configurable.
Also add separate  configuration for view caches.

This deprecates the old format for configuring cache capacity.
The old format will still work for now but values in the new format
will overwrite any values set in the old format.

Example of the new format, that sets all normal caches to 50MB, aurora's shard to 100MB, shard 3 to 3GB, and view caches to 30MB:

```json
{
  "trie_cache": {
    "default_max_bytes": 50000000,
    "per_shard_max_bytes": {
      "shard1.v1": 10000000,
      "shard3.v1": 3000000000
    }
  },
  "view_trie_cache": {
    "default_max_bytes": 30000000
  }
}
```

resolves #7564
  • Loading branch information
jakmeier authored Oct 28, 2022
1 parent 084ae5a commit 3b9012d
Show file tree
Hide file tree
Showing 8 changed files with 243 additions and 85 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@
a hard limit but instead sets a memory consumption limit. For large trie nodes,
the limits are close to equivalent. For small values, there can now fit more
in the cache than previously.
[#7749](https://github.com/near/nearcore/pull/7749)
* New options `store.trie_cache` and `store.view_trie_cache` in `config.json`
to set limits on the trie cache. Deprecates the never announced
`store.trie_cache_capacities` option which was mentioned in previous change.
[#7578](https://github.com/near/nearcore/pull/7578)
* Tracing of work across actix workers within a process:
[#7866](https://github.com/near/nearcore/pull/7866),
[#7819](https://github.com/near/nearcore/pull/7819),
Expand Down
101 changes: 100 additions & 1 deletion core/primitives/src/shard_layout.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::cmp::Ordering::Greater;
use std::{fmt, str};

use byteorder::{LittleEndian, ReadBytesExt};
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -287,7 +288,7 @@ fn is_top_level_account(top_account: &AccountId, account: &AccountId) -> bool {
}

/// ShardUId is an unique representation for shards from different shard layout
#[derive(Serialize, Deserialize, Hash, Clone, Debug, Copy, PartialEq, Eq, PartialOrd, Ord)]
#[derive(Hash, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct ShardUId {
pub version: ShardVersion,
pub shard_id: u32,
Expand Down Expand Up @@ -355,6 +356,104 @@ pub fn get_block_shard_uid_rev(
Ok((block_hash, shard_id))
}

impl fmt::Display for ShardUId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "s{}.v{}", self.shard_id, self.version)
}
}

impl fmt::Debug for ShardUId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self, f)
}
}

impl str::FromStr for ShardUId {
type Err = String;

fn from_str(s: &str) -> Result<Self, Self::Err> {
let (shard_str, version_str) = s
.split_once(".")
.ok_or_else(|| format!("shard version and number must be separated by \".\""))?;

let version = version_str
.strip_prefix("v")
.ok_or_else(|| format!("shard version must start with \"v\""))?
.parse::<ShardVersion>()
.map_err(|e| format!("shard version after \"v\" must be a number, {e}"))?;

let shard_str =
shard_str.strip_prefix("s").ok_or_else(|| format!("shard id must start with \"s\""))?;
let shard_id = shard_str
.parse::<u32>()
.map_err(|e| format!("shard id after \"s\" must be a number, {e}"))?;

Ok(ShardUId { shard_id, version })
}
}

impl<'de> serde::Deserialize<'de> for ShardUId {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
deserializer.deserialize_any(ShardUIdVisitor)
}
}

impl serde::Serialize for ShardUId {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.serialize_str(&self.to_string())
}
}

struct ShardUIdVisitor;
impl<'de> serde::de::Visitor<'de> for ShardUIdVisitor {
type Value = ShardUId;

fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
write!(
formatter,
"either string format of `ShardUId` like s0v1 for shard 0 version 1, or a map"
)
}

fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
v.parse().map_err(|e| E::custom(e))
}

fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
where
A: serde::de::MapAccess<'de>,
{
// custom struct deserialization for backwards compatibility
// TODO(#7894): consider removing this code after checking
// `ShardUId` is nowhere serialized in the old format
let mut version = None;
let mut shard_id = None;

while let Some((field, value)) = map.next_entry()? {
match field {
"version" => version = Some(value),
"shard_id" => shard_id = Some(value),
_ => return Err(serde::de::Error::unknown_field(field, &["version", "shard_id"])),
}
}

match (version, shard_id) {
(None, _) => Err(serde::de::Error::missing_field("version")),
(_, None) => Err(serde::de::Error::missing_field("shard_id")),
(Some(version), Some(shard_id)) => Ok(ShardUId { version, shard_id }),
}
}
}

#[cfg(test)]
mod tests {
use crate::shard_layout::{account_id_to_shard_id, ShardLayout, ShardUId};
Expand Down
52 changes: 47 additions & 5 deletions core/store/src/config.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use near_primitives::shard_layout::ShardUId;
use std::{collections::HashMap, iter::FromIterator};

use crate::trie::DEFAULT_SHARD_CACHE_TOTAL_SIZE_LIMIT;

#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
#[serde(default)]
Expand Down Expand Up @@ -37,12 +40,15 @@ pub struct StoreConfig {
/// the performance of the storage
pub block_size: bytesize::ByteSize,

/// Trie cache capacities
/// Default value: ShardUId {version: 1, shard_id: 3} -> 45_000_000
/// We're still experimenting with this parameter and it seems decreasing its value can improve
/// the performance of the storage
/// DEPRECATED: use `trie_cache` instead.
/// TODO(#7894): Remove in version >1.31
pub trie_cache_capacities: Vec<(ShardUId, u64)>,

/// Trie cache configuration per shard for normal (non-view) caches.
pub trie_cache: TrieCacheConfig,
/// Trie cache configuration per shard for view caches.
pub view_trie_cache: TrieCacheConfig,

/// Enable fetching account and access key data ahead of time to avoid IO latency.
pub enable_receipt_prefetching: bool,

Expand Down Expand Up @@ -171,7 +177,22 @@ impl Default for StoreConfig {
// we use it since then.
block_size: bytesize::ByteSize::kib(16),

trie_cache_capacities: vec![(ShardUId { version: 1, shard_id: 3 }, 45_000_000)],
// deprecated
trie_cache_capacities: vec![],

trie_cache: TrieCacheConfig {
default_max_bytes: DEFAULT_SHARD_CACHE_TOTAL_SIZE_LIMIT,
// Temporary solution to make contracts with heavy trie access
// patterns on shard 3 more stable. It was chosen by the estimation
// of the largest contract storage size we are aware as of 23/08/2022.
// Consider removing after implementing flat storage. (#7327)
per_shard_max_bytes: HashMap::from_iter([(
ShardUId { version: 1, shard_id: 3 },
3_000_000_000,
)]),
},
view_trie_cache: TrieCacheConfig::default(),

enable_receipt_prefetching: true,
sweat_prefetch_receivers: vec![
"token.sweat".to_owned(),
Expand Down Expand Up @@ -222,3 +243,24 @@ impl Default for MigrationSnapshot {
Self::Enabled(true)
}
}

#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
#[serde(default)]
pub struct TrieCacheConfig {
/// Limit the memory consumption of the trie cache per shard.
///
/// This is an approximate limit that attempts to factor in data structure
/// overhead also. It is supposed to be fairly accurate in the limit.
pub default_max_bytes: u64,
/// Overwrites `default_max_bytes` for specific shards.
pub per_shard_max_bytes: HashMap<ShardUId, u64>,
}

impl Default for TrieCacheConfig {
fn default() -> Self {
Self {
default_max_bytes: DEFAULT_SHARD_CACHE_TOTAL_SIZE_LIMIT,
per_shard_max_bytes: Default::default(),
}
}
}
116 changes: 42 additions & 74 deletions core/store/src/trie/config.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,15 @@
use crate::config::TrieCacheConfig;
use crate::trie::trie_storage::TrieCacheInner;
use crate::StoreConfig;
use near_primitives::shard_layout::ShardUId;
use near_primitives::types::AccountId;
use std::collections::HashMap;
use std::str::FromStr;
use tracing::error;
use tracing::{error, warn};

/// Default number of cache entries.
/// It was chosen to fit into RAM well. RAM spend on trie cache should not exceed 50_000 * 4 (number of shards) *
/// TRIE_LIMIT_CACHED_VALUE_SIZE * 2 (number of caches - for regular and view client) = 0.4 GB.
/// In our tests on a single shard, it barely occupied 40 MB, which is dominated by state cache size
/// with 512 MB limit. The total RAM usage for a single shard was 1 GB.
const TRIE_DEFAULT_SHARD_CACHE_SIZE: u64 = if cfg!(feature = "no_cache") { 1 } else { 50000 };

/// Default total size of values which may simultaneously exist the cache.
/// It is chosen by the estimation of the largest contract storage size we are aware as of 23/08/2022.
const DEFAULT_SHARD_CACHE_TOTAL_SIZE_LIMIT: u64 =
if cfg!(feature = "no_cache") { 1 } else { 3_000_000_000 };
/// Default memory limit, if nothing else is configured.
/// It is chosen to correspond roughly to the old limit, which was
/// 50k entries * TRIE_LIMIT_CACHED_VALUE_SIZE.
pub(crate) const DEFAULT_SHARD_CACHE_TOTAL_SIZE_LIMIT: u64 =
if cfg!(feature = "no_cache") { 1 } else { 50_000_000 };

/// Capacity for the deletions queue.
/// It is chosen to fit all hashes of deleted nodes for 3 completely full blocks.
Expand All @@ -30,8 +23,8 @@ const TRIE_LIMIT_CACHED_VALUE_SIZE: usize = 1000;
/// Stores necessary configuration for the creation of tries.
#[derive(Default)]
pub struct TrieConfig {
pub shard_cache_config: ShardCacheConfig,
pub view_shard_cache_config: ShardCacheConfig,
pub shard_cache_config: TrieCacheConfig,
pub view_shard_cache_config: TrieCacheConfig,
pub enable_receipt_prefetching: bool,

/// Configured accounts will be prefetched as SWEAT token account, if predecessor is listed as sender.
Expand All @@ -40,23 +33,22 @@ pub struct TrieConfig {
pub sweat_prefetch_senders: Vec<AccountId>,
}

pub struct ShardCacheConfig {
/// Shard cache capacity in number of trie nodes.
pub default_max_entries: u64,
/// Limits the memory consumption for the cache.
pub default_max_total_bytes: u64,
/// Overrides `default_max_entries` per shard.
pub override_max_entries: HashMap<ShardUId, u64>,
/// Overrides `default_max_total_bytes` per shard.
pub override_max_total_bytes: HashMap<ShardUId, u64>,
}

impl TrieConfig {
pub fn from_config(config: &StoreConfig) -> Self {
let mut this = Self::default();
this.shard_cache_config
.override_max_entries
.extend(config.trie_cache_capacities.iter().cloned());
/// Create a new `TrieConfig` with default values or the values specified in `StoreConfig`.
pub fn from_store_config(config: &StoreConfig) -> Self {
let mut this = TrieConfig::default();

if !config.trie_cache_capacities.is_empty() {
warn!(target: "store", "`trie_cache_capacities` is deprecated, use `trie_cache` and `view_trie_cache` instead");
for (shard_uid, capacity) in &config.trie_cache_capacities {
let bytes_limit = Self::deprecated_num_entry_to_memory_limit(*capacity);
this.shard_cache_config.per_shard_max_bytes.insert(*shard_uid, bytes_limit);
}
}

this.shard_cache_config = config.trie_cache.clone();
this.view_shard_cache_config = config.view_trie_cache.clone();

this.enable_receipt_prefetching = config.enable_receipt_prefetching;
for account in &config.sweat_prefetch_receivers {
match AccountId::from_str(account) {
Expand All @@ -70,13 +62,8 @@ impl TrieConfig {
Err(e) => error!(target: "config", "invalid account id {account}: {e}"),
}
}
this
}

/// Shard cache capacity in total bytes.
pub fn shard_cache_total_size_limit(&self, shard_uid: ShardUId, is_view: bool) -> u64 {
if is_view { &self.view_shard_cache_config } else { &self.shard_cache_config }
.total_size_limit(shard_uid)
this
}

/// Size limit in bytes per single value for caching in shard caches.
Expand All @@ -94,42 +81,23 @@ impl TrieConfig {
pub fn deletions_queue_capacity(&self) -> usize {
DEFAULT_SHARD_CACHE_DELETIONS_QUEUE_CAPACITY
}
}

impl ShardCacheConfig {
// TODO(#7894): Remove this when `trie_cache_capacities` is removed from config.
fn capacity(&self, shard_uid: ShardUId) -> u64 {
self.override_max_entries.get(&shard_uid).cloned().unwrap_or(self.default_max_entries)
}

fn total_size_limit(&self, shard_uid: ShardUId) -> u64 {
let explicit_limit = self
.override_max_total_bytes
.get(&shard_uid)
.copied()
.unwrap_or(self.default_max_total_bytes);
// As long as `trie_cache_capacities` is a config option, it should be respected.
// We no longer commit to a hard limit on this. But we make sure that the old
// worst-case assumption of how much memory would be consumed still works.
// Specifically, the old calculation ignored `PER_ENTRY_OVERHEAD` and used
// `max_cached_value_size()` only to figure out a good value for how many
// nodes we want in the cache at most.
// This implicit limit should result in the same may number of nodes and same max memory
// consumption as the old config.
// TODO(#7894): Remove this when `trie_cache_capacities` is removed from config.
let implicit_limit = self.capacity(shard_uid)
* (TrieCacheInner::PER_ENTRY_OVERHEAD + TrieConfig::max_cached_value_size() as u64);
explicit_limit.min(implicit_limit)
}
}

impl Default for ShardCacheConfig {
fn default() -> Self {
Self {
default_max_entries: TRIE_DEFAULT_SHARD_CACHE_SIZE,
default_max_total_bytes: DEFAULT_SHARD_CACHE_TOTAL_SIZE_LIMIT,
override_max_entries: HashMap::default(),
override_max_total_bytes: HashMap::default(),
}
/// Given a number of max entries in the old config format, calculate how
/// many bytes the limit should be set to such that AT LEAST THE SAME NUMBER
/// can fit.
///
/// TODO(#7894): Remove this when `trie_cache_capacities` is removed from config.
///
/// As long as `trie_cache_capacities` is a config option, it should be respected.
/// We no longer commit to a hard limit on this. But we make sure that the old
/// worst-case assumption of how much memory would be consumed still works.
/// Specifically, the old calculation ignored `PER_ENTRY_OVERHEAD` and used
/// `max_cached_value_size()` only to figure out a good value for how many
/// nodes we want in the cache at most.
/// This implicit limit should result in the same min number of nodes and
/// same max memory consumption as the old config.
pub(crate) fn deprecated_num_entry_to_memory_limit(max_num_entries: u64) -> u64 {
max_num_entries
* (TrieCacheInner::PER_ENTRY_OVERHEAD + TrieConfig::max_cached_value_size() as u64)
}
}
1 change: 1 addition & 0 deletions core/store/src/trie/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use near_primitives::types::{StateRoot, StateRootNode};

use crate::flat_state::FlatState;
pub use crate::trie::config::TrieConfig;
pub(crate) use crate::trie::config::DEFAULT_SHARD_CACHE_TOTAL_SIZE_LIMIT;
use crate::trie::insert_delete::NodesStorage;
use crate::trie::iterator::TrieIterator;
pub use crate::trie::nibble_slice::NibbleSlice;
Expand Down
Loading

0 comments on commit 3b9012d

Please sign in to comment.