diff --git a/near-sdk/src/store/free_list/mod.rs b/near-sdk/src/store/free_list/mod.rs index 00ad4a908..7d551d3ab 100644 --- a/near-sdk/src/store/free_list/mod.rs +++ b/near-sdk/src/store/free_list/mod.rs @@ -223,6 +223,92 @@ where pub fn drain(&mut self) -> Drain { Drain::new(self) } + + /// Empty slots in the front of the list is swapped with occupied slots in back of the list. + /// Defrag helps reduce gas cost in certain scenarios where lot of elements in front of the list are + /// removed without getting replaced. Please see https://github.com/near/near-sdk-rs/issues/990 + pub(crate) fn defrag(&mut self, callback: F) + where + F: FnMut(&T, u32), + { + Defrag::new(self).defrag(callback); + self.first_free = None; + } +} + +/// Defrag struct has helper functions to perform defragmentation of `FreeList`. See the +/// documentation of function [`FreeList::defrag`] for more details. +struct Defrag<'a, T> +where + T: BorshSerialize + BorshDeserialize, +{ + elements: &'a mut Vector>, + occupied_count: u32, + curr_free_slot: Option, + defrag_index: u32, +} + +impl<'a, T> Defrag<'a, T> +where + T: BorshSerialize + BorshDeserialize, +{ + /// Create a new struct for defragmenting `FreeList`. + fn new(list: &'a mut FreeList) -> Self { + Self { + elements: &mut list.elements, + occupied_count: list.occupied_count, + defrag_index: list.occupied_count, + curr_free_slot: list.first_free, + } + } + + fn defrag(&mut self, mut callback: F) + where + F: FnMut(&T, u32), + { + while let Some(curr_free_index) = self.next_free_slot() { + if let Some((value, occupied_index)) = self.next_occupied() { + callback(value, curr_free_index.0); + //The entry at curr_free_index.0 should have `None` by now. + //Moving it to `occupied_index` will make that entry empty. + self.elements.swap(curr_free_index.0, occupied_index); + } else { + //Could not find an occupied slot to fill the free slot + env::panic_str(ERR_INCONSISTENT_STATE) + } + } + + // After defragmenting, these should all be `Slot::Empty`. + self.elements.drain(self.occupied_count..); + } + + fn next_free_slot(&mut self) -> Option { + while let Some(curr_free_index) = self.curr_free_slot { + let curr_slot = self.elements.get(curr_free_index.0); + self.curr_free_slot = match curr_slot { + Some(Slot::Empty { next_free }) => *next_free, + Some(Slot::Occupied(_)) => { + //The free list chain should not have an occupied slot + env::panic_str(ERR_INCONSISTENT_STATE) + } + _ => None, + }; + if curr_free_index.0 < self.occupied_count { + return Some(curr_free_index); + } + } + None + } + + fn next_occupied(&mut self) -> Option<(&T, u32)> { + while self.defrag_index < self.elements.len { + if let Some(Slot::Occupied(value)) = self.elements.get(self.defrag_index) { + return Some((value, self.defrag_index)); + } + self.defrag_index += 1; + } + None + } } #[cfg(not(target_arch = "wasm32"))] @@ -236,6 +322,25 @@ mod tests { use super::*; use crate::test_utils::test_env::setup_free; + #[test] + fn new_bucket_is_empty() { + let bucket: FreeList = FreeList::new(b"b"); + assert!(bucket.is_empty()); + } + + #[test] + fn occupied_count_gets_updated() { + let mut bucket = FreeList::new(b"b"); + let indices: Vec<_> = (0..5).map(|i| bucket.insert(i)).collect(); + + assert_eq!(bucket.occupied_count, 5); + + bucket.remove(indices[1]); + bucket.remove(indices[3]); + + assert_eq!(bucket.occupied_count, 3); + } + #[test] fn basic_functionality() { let mut bucket = FreeList::new(b"b"); @@ -252,6 +357,32 @@ mod tests { assert_eq!(bucket.get(i3), Some(&4)); } + #[test] + fn defrag() { + let mut bucket = FreeList::new(b"b"); + let indices: Vec<_> = (0..8).map(|i| bucket.insert(i)).collect(); + + //Empty, Empty, Empty, Empty, Occupied, Empty, Occupied, Empty + bucket.remove(indices[1]); + bucket.remove(indices[3]); + bucket.remove(indices[0]); + bucket.remove(indices[5]); + bucket.remove(indices[2]); + bucket.remove(indices[7]); + + //4 should move to index 0, 6 should move to index 1 + bucket.defrag(|_, _| {}); + + //Check the free slots chain is complete after defrag + assert_eq!(bucket.occupied_count, bucket.len()); + + assert_eq!(*bucket.get(indices[0]).unwrap(), 4u8); + assert_eq!(*bucket.get(indices[1]).unwrap(), 6u8); + for i in indices[2..].iter() { + assert_eq!(bucket.get(*i), None); + } + } + #[test] fn bucket_iterator() { let mut bucket = FreeList::new(b"b"); diff --git a/near-sdk/src/store/mod.rs b/near-sdk/src/store/mod.rs index 1c0fb3eb2..5a3c494ec 100644 --- a/near-sdk/src/store/mod.rs +++ b/near-sdk/src/store/mod.rs @@ -39,7 +39,7 @@ //! - [`UnorderedMap`]: Storage version of [`std::collections::HashMap`]. No ordering //! guarantees. //! -//! - [`TreeMap`] (`unstable`): Storage version of [`std::collections::BTreeMap`]. Ordered by key, +//! - [`TreeMap`](TreeMap) (`unstable`): Storage version of [`std::collections::BTreeMap`]. Ordered by key, //! which comes at the cost of more expensive lookups and iteration. //! //! Sets: diff --git a/near-sdk/src/store/unordered_map/mod.rs b/near-sdk/src/store/unordered_map/mod.rs index 68b2ddff0..426ff02ac 100644 --- a/near-sdk/src/store/unordered_map/mod.rs +++ b/near-sdk/src/store/unordered_map/mod.rs @@ -535,6 +535,16 @@ where /// [`BorshSerialize`] and [`ToOwned`](ToOwned) on the borrowed form *must* match /// those for the key type. /// + /// # Performance + /// + /// When elements are removed, the underlying vector of keys isn't + /// rearranged; instead, the removed key is replaced with a placeholder value. These + /// empty slots are reused on subsequent [`insert`](Self::insert) operations. + /// + /// In cases where there are a lot of removals and not a lot of insertions, these leftover + /// placeholders might make iteration more costly, driving higher gas costs. If you need to + /// remedy this, take a look at [`defrag`](Self::defrag). + /// /// # Examples /// /// ``` @@ -563,6 +573,16 @@ where /// [`BorshSerialize`] and [`ToOwned`](ToOwned) on the borrowed form *must* match /// those for the key type. /// + /// # Performance + /// + /// When elements are removed, the underlying vector of keys isn't + /// rearranged; instead, the removed key is replaced with a placeholder value. These + /// empty slots are reused on subsequent [`insert`](Self::insert) operations. + /// + /// In cases where there are a lot of removals and not a lot of insertions, these leftover + /// placeholders might make iteration more costly, driving higher gas costs. If you need to + /// remedy this, take a look at [`defrag`](Self::defrag). + /// /// # Examples /// /// ``` @@ -630,6 +650,47 @@ where } } +impl UnorderedMap +where + K: BorshSerialize + BorshDeserialize + Ord + Clone, + V: BorshSerialize + BorshDeserialize, + H: ToKey, +{ + /// Remove empty placeholders leftover from calling [`remove`](Self::remove). + /// + /// When elements are removed using [`remove`](Self::remove), the underlying vector isn't + /// rearranged; instead, the removed element is replaced with a placeholder value. These + /// empty slots are reused on subsequent [`insert`](Self::insert) operations. + /// + /// In cases where there are a lot of removals and not a lot of insertions, these leftover + /// placeholders might make iteration more costly, driving higher gas costs. This method is meant + /// to remedy that by removing all empty slots from the underlying vector and compacting it. + /// + /// # Examples + /// + /// ``` + /// use near_sdk::store::UnorderedMap; + /// + /// let mut map = UnorderedMap::new(b"b"); + /// + /// for i in 0..4 { + /// map.insert(i, i); + /// } + /// + /// map.remove(&1); + /// map.remove(&3); + /// + /// map.defrag(); + /// ``` + pub fn defrag(&mut self) { + self.keys.defrag(|key, new_index| { + if let Some(existing) = self.values.get_mut(key) { + existing.key_index = FreeListIndex(new_index); + } + }); + } +} + #[cfg(not(target_arch = "wasm32"))] #[cfg(test)] mod tests { @@ -756,4 +817,37 @@ mod tests { } } } + + #[test] + fn defrag() { + let mut map = UnorderedMap::new(b"b"); + + let all_indices = 0..=8; + + for i in all_indices { + map.insert(i, i); + } + + let removed = [2, 4, 6]; + let existing = [0, 1, 3, 5, 7, 8]; + + for id in removed { + map.remove(&id); + } + + map.defrag(); + + for i in removed { + assert_eq!(map.get(&i), None); + } + for i in existing { + assert_eq!(map.get(&i), Some(&i)); + } + + //Check the elements moved during defragmentation + assert_eq!(map.remove_entry(&7).unwrap(), (7, 7)); + assert_eq!(map.remove_entry(&8).unwrap(), (8, 8)); + assert_eq!(map.remove_entry(&1).unwrap(), (1, 1)); + assert_eq!(map.remove_entry(&3).unwrap(), (3, 3)); + } } diff --git a/near-sdk/src/store/unordered_set/iter.rs b/near-sdk/src/store/unordered_set/iter.rs index 47dad9f05..321df6d96 100644 --- a/near-sdk/src/store/unordered_set/iter.rs +++ b/near-sdk/src/store/unordered_set/iter.rs @@ -190,12 +190,12 @@ where { } -/// A lazy iterator producing elements in the symmetrical difference of `UnorderedSet`s. +/// A lazy iterator producing elements in the symmetrical difference of [`UnorderedSet`]s. /// -/// This `struct` is created by the [`symmetrical_difference`] method on [`UnorderedSet`]. +/// This `struct` is created by the [`symmetric_difference`] method on [`UnorderedSet`]. /// See its documentation for more. /// -/// [`symmetrical_difference`]: UnorderedSet::symmetrical_difference +/// [`symmetric_difference`]: UnorderedSet::symmetric_difference pub struct SymmetricDifference<'a, T, H> where T: BorshSerialize + Ord + BorshDeserialize, @@ -284,7 +284,7 @@ where { } -/// A draining iterator for [`UnorderedMap`]. +/// A draining iterator for [`UnorderedSet`]. /// /// This `struct` is created by the [`drain`] method on [`UnorderedSet`]. /// See its documentation for more. diff --git a/near-sdk/src/store/vec/mod.rs b/near-sdk/src/store/vec/mod.rs index 3c2899a7d..4362c19f7 100644 --- a/near-sdk/src/store/vec/mod.rs +++ b/near-sdk/src/store/vec/mod.rs @@ -349,7 +349,7 @@ where self.values.get_mut(index) } - fn swap(&mut self, a: u32, b: u32) { + pub(crate) fn swap(&mut self, a: u32, b: u32) { if a >= self.len() || b >= self.len() { env::panic_str(ERR_INDEX_OUT_OF_BOUNDS); }