near · Longarithm · Mar 25, 2024 · Mar 20, 2024 · Mar 20, 2024 · Mar 20, 2024
@@ -143,7 +143,7 @@ impl MemTries {
     pub fn update(
         &self,
         root: CryptoHash,
-        track_disk_changes: bool,
+        track_trie_changes: bool,
     ) -> Result<MemTrieUpdate, StorageError> {
         let root_id = if root == CryptoHash::default() {
             None
@@ -163,7 +163,7 @@ impl MemTries {
             root_id,
             &self.arena.memory(),
             self.shard_uid.to_string(),
-            track_disk_changes,
+            track_trie_changes,
         ))
     }
 }

@@ -8,7 +8,8 @@ use crate::{NibbleSlice, RawTrieNode, RawTrieNodeWithSize, TrieChanges};
 use near_primitives::hash::{hash, CryptoHash};
 use near_primitives::state::FlatStateValue;
 use near_primitives::types::BlockHeight;
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
 
 /// An old node means a node in the current in-memory trie. An updated node means a
 /// node we're going to store in the in-memory trie but have not constructed there yet.
@@ -43,6 +44,28 @@ pub enum UpdatedMemTrieNode {
     },
 }
 
+/// Keeps values and internal nodes accessed on updating memtrie.
+pub(crate) struct TrieAccesses {
+    /// Hashes and encoded trie nodes.
+    pub nodes: HashMap<CryptoHash, Arc<[u8]>>,
+    /// Hashes of accessed values - because values themselves are not
+    /// necessarily present in memtrie.
+    pub values: HashSet<CryptoHash>,
+}
 let evicted_ptr = self.ext.storage_get(&key, StorageGetMode::Trie)?; 
 if let Some(recorder) = &self.recorder { 
 let evicted_ptr = self.ext.storage_get(&key, StorageGetMode::Trie)?; 
 if let Some(recorder) = &self.recorder { 
+
+/// Tracks intermediate trie changes, final version of which is to be committed
+/// to disk after finishing trie update.
+struct TrieChangesTracker {
+    /// Changes of reference count on disk for each impacted node.
+    refcount_changes: TrieRefcountDeltaMap,
+    /// All observed values and internal nodes.
+    /// Needed to prepare recorded storage.
+    /// Note that negative `refcount_changes` does not fully cover it, as node
+    /// or value of the same hash can be removed and inserted for the same
+    /// update in different parts of trie!
+    accesses: TrieAccesses,
+}
+
 /// Structure to build an update to the in-memory trie.
 pub struct MemTrieUpdate<'a> {
     /// The original root before updates. It is None iff the original trie had no keys.
@@ -53,8 +76,9 @@ pub struct MemTrieUpdate<'a> {
     /// (1) temporarily we take out the node from the slot to process it and put it back
     /// later; or (2) the node is deleted afterwards.
     pub updated_nodes: Vec<Option<UpdatedMemTrieNode>>,
-    /// Refcount changes to on-disk trie nodes.
-    pub trie_refcount_changes: Option<TrieRefcountDeltaMap>,
+    /// Tracks trie changes necessary to make on-disk updates and recorded
+    /// storage.
+    tracked_trie_changes: Option<TrieChangesTracker>,
 }
 
 impl UpdatedMemTrieNode {
@@ -97,15 +121,18 @@ impl<'a> MemTrieUpdate<'a> {
         root: Option<MemTrieNodeId>,
         arena: &'a ArenaMemory,
         shard_uid: String,
-        track_disk_changes: bool,
+        track_trie_changes: bool,
     ) -> Self {
         let mut trie_update = Self {
             root,
             arena,
             shard_uid,
             updated_nodes: vec![],
-            trie_refcount_changes: if track_disk_changes {
-                Some(TrieRefcountDeltaMap::new())
+            tracked_trie_changes: if track_trie_changes {
+                Some(TrieChangesTracker {
+                    refcount_changes: TrieRefcountDeltaMap::new(),
+                    accesses: TrieAccesses { nodes: HashMap::new(), values: HashSet::new() },
+                })
             } else {
                 None
             },
@@ -145,8 +172,16 @@ impl<'a> MemTrieUpdate<'a> {
         match node {
             None => self.new_updated_node(UpdatedMemTrieNode::Empty),
             Some(node) => {
-                if let Some(trie_refcount_changes) = self.trie_refcount_changes.as_mut() {
-                    trie_refcount_changes.subtract(node.as_ptr(self.arena).view().node_hash(), 1);
+                if let Some(tracked_trie_changes) = self.tracked_trie_changes.as_mut() {
+                    let node_view = node.as_ptr(self.arena).view();
+                    let node_hash = node_view.node_hash();
+                    let raw_node_serialized =
+                        borsh::to_vec(&node_view.to_raw_trie_node_with_size()).unwrap();
+                    tracked_trie_changes
+                        .accesses
+                        .nodes
+                        .insert(node_hash, raw_node_serialized.into());
+                    tracked_trie_changes.refcount_changes.subtract(node_hash, 1);
                 }
                 self.new_updated_node(UpdatedMemTrieNode::from_existing_node_view(
                     node.as_ptr(self.arena).view(),
@@ -164,14 +199,15 @@ impl<'a> MemTrieUpdate<'a> {
     }
 
     fn add_refcount_to_value(&mut self, hash: CryptoHash, value: Option<Vec<u8>>) {
-        if let Some(trie_refcount_changes) = self.trie_refcount_changes.as_mut() {
-            trie_refcount_changes.add(hash, value.unwrap(), 1);
+        if let Some(tracked_node_changes) = self.tracked_trie_changes.as_mut() {
+            tracked_node_changes.refcount_changes.add(hash, value.unwrap(), 1);
         }
     }
 
     fn subtract_refcount_for_value(&mut self, hash: CryptoHash) {
-        if let Some(trie_refcount_changes) = self.trie_refcount_changes.as_mut() {
-            trie_refcount_changes.subtract(hash, 1);
+        if let Some(tracked_node_changes) = self.tracked_trie_changes.as_mut() {
+            tracked_node_changes.accesses.values.insert(hash);
+            tracked_node_changes.refcount_changes.subtract(hash, 1);
         }
     }
 
@@ -779,31 +815,36 @@ impl<'a> MemTrieUpdate<'a> {
     }
 
     /// Converts the updates to trie changes as well as memtrie changes.
-    pub fn to_trie_changes(self) -> TrieChanges {
-        let Self { root, arena, shard_uid, trie_refcount_changes, updated_nodes } = self;
-        let mut trie_refcount_changes =
-            trie_refcount_changes.expect("Cannot to_trie_changes for memtrie changes only");
+    pub(crate) fn to_trie_changes(self) -> (TrieChanges, TrieAccesses) {
+        let Self { root, arena, shard_uid, tracked_trie_changes, updated_nodes } = self;
+        let TrieChangesTracker { mut refcount_changes, accesses } =
+            tracked_trie_changes.expect("Cannot to_trie_changes for memtrie changes only");
         let (mem_trie_changes, hashes_and_serialized) =
             Self::to_mem_trie_changes_internal(shard_uid, arena, updated_nodes);
 
         // We've accounted for the dereferenced nodes, as well as value addition/subtractions.
         // The only thing left is to increment refcount for all new nodes.
         for (node_hash, node_serialized) in hashes_and_serialized {
-            trie_refcount_changes.add(node_hash, node_serialized, 1);
-        }
-        let (insertions, deletions) = trie_refcount_changes.into_changes();
-
-        TrieChanges {
-            old_root: root.map(|root| root.as_ptr(arena).view().node_hash()).unwrap_or_default(),
-            new_root: mem_trie_changes
-                .node_ids_with_hashes
-                .last()
-                .map(|(_, hash)| *hash)
-                .unwrap_or_default(),
-            insertions,
-            deletions,
-            mem_trie_changes: Some(mem_trie_changes),
+            refcount_changes.add(node_hash, node_serialized, 1);
         }
+        let (insertions, deletions) = refcount_changes.into_changes();
+
+        (
+            TrieChanges {
+                old_root: root
+                    .map(|root| root.as_ptr(arena).view().node_hash())
+                    .unwrap_or_default(),
+                new_root: mem_trie_changes
+                    .node_ids_with_hashes
+                    .last()
+                    .map(|(_, hash)| *hash)
+                    .unwrap_or_default(),
+                insertions,
+                deletions,
+                mem_trie_changes: Some(mem_trie_changes),
+            },
+            accesses,
+        )
     }
 }
 
@@ -917,7 +958,7 @@ mod tests {
                     update.delete(&key);
                 }
             }
-            update.to_trie_changes()
+            update.to_trie_changes().0
         }
 
         fn make_memtrie_changes_only(

@@ -1498,7 +1498,36 @@ impl Trie {
                         None => trie_update.delete(&key),
                     }
                 }
-                Ok(trie_update.to_trie_changes())
+                let (trie_changes, trie_accesses) = trie_update.to_trie_changes();
+
+                // Sanity check for tests: all modified trie items must be
+                // present in ever accessed trie items.
+                #[cfg(test)]
+                {
+                    for t in trie_changes.deletions.iter() {
+                        let hash = t.trie_node_or_value_hash;
+                        assert!(
+                            trie_accesses.values.contains(&hash)
+                                || trie_accesses.nodes.contains_key(&hash),
+                            "Hash {} is not present in trie accesses",
+                            hash
+                        );
+                    }
+                }
+
+                // Retroactively record all accessed trie items to account for
+                // key-value pairs which were only written but never read, thus
+                // not recorded before.
+                if let Some(recorder) = &self.recorder {
+                    for (node_hash, serialized_node) in trie_accesses.nodes {
+                        recorder.borrow_mut().record(&node_hash, serialized_node);
+                    }
+                    for value_hash in trie_accesses.values {
+                        let value = self.storage.retrieve_raw_bytes(&value_hash)?;
+                        recorder.borrow_mut().record(&value_hash, value);
+                    }
+                }
+                Ok(trie_changes)
             }
             None => {
                 let mut memory = NodesStorage::new();

@@ -44,12 +44,13 @@ mod trie_recording_tests {
     use crate::trie::mem::metrics::MEM_TRIE_NUM_LOOKUPS;
     use crate::trie::TrieNodesCount;
     use crate::{DBCol, Store, Trie};
+    use borsh::BorshDeserialize;
     use near_primitives::hash::{hash, CryptoHash};
-    use near_primitives::shard_layout::{get_block_shard_uid, get_block_shard_uid_rev, ShardUId};
+    use near_primitives::shard_layout::{get_block_shard_uid, ShardUId};
     use near_primitives::state::ValueRef;
     use near_primitives::types::chunk_extra::ChunkExtra;
     use near_primitives::types::StateRoot;
-    use rand::{thread_rng, Rng};
+    use rand::{random, thread_rng, Rng};
     use std::collections::{HashMap, HashSet};
     use std::num::NonZeroU32;
 
@@ -66,6 +67,8 @@ mod trie_recording_tests {
         /// The keys that we should be using to call get_optimized_ref() on the
         /// trie with.
         keys_to_get_ref: Vec<Vec<u8>>,
+        /// The keys to be updated after trie reads.
+        updates: Vec<(Vec<u8>, Option<Vec<u8>>)>,
         state_root: StateRoot,
     }
 
@@ -121,13 +124,26 @@ mod trie_recording_tests {
                 }
                 key
             })
-            .partition::<Vec<_>, _>(|_| thread_rng().gen());
+            .partition::<Vec<_>, _>(|_| random());
+        let updates = trie_changes
+            .iter()
+            .map(|(key, _)| {
+                let value = if thread_rng().gen_bool(0.5) {
+                    Some(vec![thread_rng().gen_range(0..10) as u8])
+                } else {
+                    None
+                };
+                (key.clone(), value)
+            })
+            .filter(|_| random())
+            .collect::<Vec<_>>();
         PreparedTrie {
             store: tries_for_building.get_store(),
             shard_uid,
             data_in_trie,
             keys_to_get,
             keys_to_get_ref,
+            updates,
             state_root,
         }
     }
@@ -146,7 +162,7 @@ mod trie_recording_tests {
         for result in store.iter_raw_bytes(DBCol::State) {
             let (key, value) = result.unwrap();
             let (_, refcount) = decode_value_with_rc(&value);
-            let (key_hash, _) = get_block_shard_uid_rev(&key).unwrap();
+            let key_hash: CryptoHash = CryptoHash::try_from_slice(&key[8..]).unwrap();
             if !key_hashes_to_keep.contains(&key_hash) {
                 update.decrement_refcount_by(
                     DBCol::State,
@@ -174,6 +190,7 @@ mod trie_recording_tests {
                 data_in_trie,
                 keys_to_get,
                 keys_to_get_ref,
+                updates,
                 state_root,
             } = prepare_trie(use_missing_keys);
             let tries = if use_in_memory_tries {
@@ -206,6 +223,7 @@ mod trie_recording_tests {
             }
             let baseline_trie_nodes_count = trie.get_trie_nodes_count();
             println!("Baseline trie nodes count: {:?}", baseline_trie_nodes_count);
+            trie.update(updates.iter().cloned()).unwrap();
 
             // Now let's do this again while recording, and make sure that the counters
             // we get are exactly the same.
@@ -223,6 +241,7 @@ mod trie_recording_tests {
                 );
             }
             assert_eq!(trie.get_trie_nodes_count(), baseline_trie_nodes_count);
+            trie.update(updates.iter().cloned()).unwrap();
 
             // Now, let's check that when doing the same lookups with the captured partial storage,
             // we still get the same counters.
@@ -246,6 +265,7 @@ mod trie_recording_tests {
                 );
             }
             assert_eq!(trie.get_trie_nodes_count(), baseline_trie_nodes_count);
+            trie.update(updates.iter().cloned()).unwrap();
 
             if use_in_memory_tries {
                 // sanity check that we did indeed use in-memory tries.
@@ -310,6 +330,7 @@ mod trie_recording_tests {
                 data_in_trie,
                 keys_to_get,
                 keys_to_get_ref,
+                updates,
                 state_root,
             } = prepare_trie(use_missing_keys);
             let tries = if use_in_memory_tries {
@@ -364,6 +385,7 @@ mod trie_recording_tests {
             }
             let baseline_trie_nodes_count = trie.get_trie_nodes_count();
             println!("Baseline trie nodes count: {:?}", baseline_trie_nodes_count);
+            trie.update(updates.iter().cloned()).unwrap();
 
             // Let's do this again, but this time recording reads. We'll make sure
             // the counters are exactly the same even when we're recording.
@@ -388,6 +410,7 @@ mod trie_recording_tests {
                 );
             }
             assert_eq!(trie.get_trie_nodes_count(), baseline_trie_nodes_count);
+            trie.update(updates.iter().cloned()).unwrap();
 
             // Now, let's check that when doing the same lookups with the captured partial storage,
             // we still get the same counters.
@@ -411,6 +434,7 @@ mod trie_recording_tests {
                 );
             }
             assert_eq!(trie.get_trie_nodes_count(), baseline_trie_nodes_count);
+            trie.update(updates.iter().cloned()).unwrap();
 
             if use_in_memory_tries {
                 // sanity check that we did indeed use in-memory tries.

@@ -419,10 +419,12 @@ mod trie_storage_tests {
         assert_eq!(count_delta.mem_reads, 1);
     }
 
-    // TODO(#10769): Make this test pass.
+    // Checks that for keys only touched on writes recorded storage for
+    // memtrie matches recorded storage for disk.
+    // Required because recording on read and write happen on different code
+    // paths for memtrie.
     #[test]
-    #[should_panic]
-    fn test_memtrie_discrepancy() {
+    fn test_memtrie_recorded_writes() {
         init_test_logger();
         let tries = TestTriesBuilder::new().build();
         let shard_uid = ShardUId::single_shard();