From ed25c03d1f6c703dfa63170721b159d27681679e Mon Sep 17 00:00:00 2001
From: brooks <brooks@anza.xyz>
Date: Tue, 10 Dec 2024 14:22:33 -0500
Subject: [PATCH 1/3] Reclaims more old accounts in `clean`

---
 accounts-db/src/accounts_db.rs | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs
index 26923d5e05a224..3e246f08820054 100644
--- a/accounts-db/src/accounts_db.rs
+++ b/accounts-db/src/accounts_db.rs
@@ -2862,6 +2862,19 @@ impl AccountsDb {
                                         } else {
                                             found_not_zero += 1;
                                         }
+
+                                        // If this candidate has multiple rooted slot list entries,
+                                        // we should reclaim the older ones.
+                                        if slot_list.len() > 1
+                                            && *slot
+                                                <= max_clean_root_inclusive.unwrap_or(Slot::MAX)
+                                        {
+                                            should_collect_reclaims = true;
+                                            purges_old_accounts_local += 1;
+                                            useless = false;
+                                        }
+                                        // Note, this next if-block is only kept to maintain the
+                                        // `uncleaned_roots_slot_list_1` stat.
                                         if uncleaned_roots.contains(slot) {
                                             // Assertion enforced by `accounts_index.get()`, the latest slot
                                             // will not be greater than the given `max_clean_root`
@@ -2870,12 +2883,7 @@ impl AccountsDb {
                                             {
                                                 assert!(slot <= &max_clean_root_inclusive);
                                             }
-                                            if slot_list.len() > 1 {
-                                                // no need to reclaim old accounts if there is only 1 slot in the slot list
-                                                should_collect_reclaims = true;
-                                                purges_old_accounts_local += 1;
-                                                useless = false;
-                                            } else {
+                                            if slot_list.len() == 1 {
                                                 self.clean_accounts_stats
                                                     .uncleaned_roots_slot_list_1
                                                     .fetch_add(1, Ordering::Relaxed);

From eb1b7a026a362161b4bc20481b4acca6495e5cf3 Mon Sep 17 00:00:00 2001
From: brooks <brooks@anza.xyz>
Date: Wed, 11 Dec 2024 10:53:51 -0500
Subject: [PATCH 2/3] adds test

---
 accounts-db/src/accounts_db/tests.rs | 57 +++++++++++++++++++++++++++-
 accounts-db/src/accounts_index.rs    | 10 +++++
 2 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/accounts-db/src/accounts_db/tests.rs b/accounts-db/src/accounts_db/tests.rs
index e1eb4b85144987..4c79a7c2ad42db 100644
--- a/accounts-db/src/accounts_db/tests.rs
+++ b/accounts-db/src/accounts_db/tests.rs
@@ -23,7 +23,7 @@ use {
     },
     std::{
         hash::DefaultHasher,
-        iter::FromIterator,
+        iter::{self, FromIterator},
         str::FromStr,
         sync::{atomic::AtomicBool, RwLock},
         thread::{self, Builder, JoinHandle},
@@ -8146,3 +8146,58 @@ fn compute_merkle_root(hashes: impl IntoIterator<Item = Hash>) -> Hash {
     let hashes = hashes.into_iter().collect();
     AccountsHasher::compute_merkle_root_recurse(hashes, MERKLE_FANOUT)
 }
+
+/// Test that `clean` reclaims old accounts when cleaning old storages
+///
+/// When `clean` constructs candidates from old storages, pubkeys in these storages may have other
+/// newer versions of the accounts in other newer storages *not* explicitly marked to be visited by
+/// `clean`.  In this case, `clean` should still reclaim the old versions of these accounts.
+#[test]
+fn test_clean_old_storages_with_reclaims() {
+    let accounts_db = AccountsDb::new_single_for_tests();
+    let pubkey = Pubkey::new_unique();
+    let old_slot = 11;
+    let new_slot = 22;
+    let slots = [old_slot, new_slot];
+    for &slot in &slots {
+        let account = AccountSharedData::new(slot, 0, &Pubkey::new_unique());
+        // store `pubkey` into multiple slots, and also store another unique pubkey
+        // to prevent the whole storage from being marked as dead by `clean`.
+        accounts_db.store_for_tests(
+            slot,
+            &[(&pubkey, &account), (&Pubkey::new_unique(), &account)],
+        );
+        accounts_db.calculate_accounts_delta_hash(slot);
+        accounts_db.add_root_and_flush_write_cache(slot);
+    }
+
+    // ensure the slot list for `pubkey` has both the old and new slots
+    let slot_list = accounts_db
+        .accounts_index
+        .get_bin(&pubkey)
+        .slot_list_mut(&pubkey, |slot_list| slot_list.clone())
+        .unwrap();
+    assert_eq!(slot_list.len(), slots.len());
+    assert!(slot_list.iter().map(|(slot, _)| slot).eq(slots.iter()));
+
+    // for this test, `new_slot` must not be in the uncleaned_roots list
+    accounts_db.accounts_index.remove_uncleaned_root(new_slot);
+    assert!(accounts_db.accounts_index.is_uncleaned_root(old_slot));
+    assert!(!accounts_db.accounts_index.is_uncleaned_root(new_slot));
+
+    // `clean` should now reclaim the account in `old_slot`, even though `new_slot` is not
+    // explicitly being cleaned
+    accounts_db.clean_accounts_for_tests();
+
+    // ensure we've reclaimed the account `old_slot`
+    let slot_list = accounts_db
+        .accounts_index
+        .get_bin(&pubkey)
+        .slot_list_mut(&pubkey, |slot_list| slot_list.clone())
+        .unwrap();
+    assert_eq!(slot_list.len(), 1);
+    assert!(slot_list
+        .iter()
+        .map(|(slot, _)| slot)
+        .eq(iter::once(&new_slot)));
+}
diff --git a/accounts-db/src/accounts_index.rs b/accounts-db/src/accounts_index.rs
index f90168aeeccd59..2aae2d80a21553 100644
--- a/accounts-db/src/accounts_index.rs
+++ b/accounts-db/src/accounts_index.rs
@@ -2021,6 +2021,16 @@ impl<T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> AccountsIndex<T, U> {
         w_roots_tracker.uncleaned_roots.extend(roots);
     }
 
+    /// Removes `root` from `uncleaned_roots` and returns whether it was previously present
+    #[cfg(feature = "dev-context-only-utils")]
+    pub fn remove_uncleaned_root(&self, root: Slot) -> bool {
+        self.roots_tracker
+            .write()
+            .unwrap()
+            .uncleaned_roots
+            .remove(&root)
+    }
+
     pub fn max_root_inclusive(&self) -> Slot {
         self.roots_tracker
             .read()

From a3438df21684a6727c65dbb992ccd368ca3d642d Mon Sep 17 00:00:00 2001
From: brooks <brooks@anza.xyz>
Date: Wed, 11 Dec 2024 16:38:27 -0500
Subject: [PATCH 3/3] add second test

---
 accounts-db/src/accounts_db/tests.rs | 65 +++++++++++++++++++++++++---
 1 file changed, 58 insertions(+), 7 deletions(-)

diff --git a/accounts-db/src/accounts_db/tests.rs b/accounts-db/src/accounts_db/tests.rs
index 4c79a7c2ad42db..648522b06762a9 100644
--- a/accounts-db/src/accounts_db/tests.rs
+++ b/accounts-db/src/accounts_db/tests.rs
@@ -8153,7 +8153,7 @@ fn compute_merkle_root(hashes: impl IntoIterator<Item = Hash>) -> Hash {
 /// newer versions of the accounts in other newer storages *not* explicitly marked to be visited by
 /// `clean`.  In this case, `clean` should still reclaim the old versions of these accounts.
 #[test]
-fn test_clean_old_storages_with_reclaims() {
+fn test_clean_old_storages_with_reclaims_rooted() {
     let accounts_db = AccountsDb::new_single_for_tests();
     let pubkey = Pubkey::new_unique();
     let old_slot = 11;
@@ -8171,6 +8171,11 @@ fn test_clean_old_storages_with_reclaims() {
         accounts_db.add_root_and_flush_write_cache(slot);
     }
 
+    // for this test, `new_slot` must not be in the uncleaned_roots list
+    accounts_db.accounts_index.remove_uncleaned_root(new_slot);
+    assert!(accounts_db.accounts_index.is_uncleaned_root(old_slot));
+    assert!(!accounts_db.accounts_index.is_uncleaned_root(new_slot));
+
     // ensure the slot list for `pubkey` has both the old and new slots
     let slot_list = accounts_db
         .accounts_index
@@ -8180,16 +8185,11 @@ fn test_clean_old_storages_with_reclaims() {
     assert_eq!(slot_list.len(), slots.len());
     assert!(slot_list.iter().map(|(slot, _)| slot).eq(slots.iter()));
 
-    // for this test, `new_slot` must not be in the uncleaned_roots list
-    accounts_db.accounts_index.remove_uncleaned_root(new_slot);
-    assert!(accounts_db.accounts_index.is_uncleaned_root(old_slot));
-    assert!(!accounts_db.accounts_index.is_uncleaned_root(new_slot));
-
     // `clean` should now reclaim the account in `old_slot`, even though `new_slot` is not
     // explicitly being cleaned
     accounts_db.clean_accounts_for_tests();
 
-    // ensure we've reclaimed the account `old_slot`
+    // ensure we've reclaimed the account in `old_slot`
     let slot_list = accounts_db
         .accounts_index
         .get_bin(&pubkey)
@@ -8201,3 +8201,54 @@ fn test_clean_old_storages_with_reclaims() {
         .map(|(slot, _)| slot)
         .eq(iter::once(&new_slot)));
 }
+
+/// Test that `clean` respects rooted vs unrooted slots w.r.t. reclaims
+///
+/// When an account is in multiple slots, and the latest is unrooted, `clean` should *not* reclaim
+/// all the rooted versions.
+#[test]
+fn test_clean_old_storages_with_reclaims_unrooted() {
+    let accounts_db = AccountsDb::new_single_for_tests();
+    let pubkey = Pubkey::new_unique();
+    let old_slot = 11;
+    let new_slot = 22;
+    let slots = [old_slot, new_slot];
+    for &slot in &slots {
+        let account = AccountSharedData::new(slot, 0, &Pubkey::new_unique());
+        // store `pubkey` into multiple slots, and also store another unique pubkey
+        // to prevent the whole storage from being marked as dead by `clean`.
+        accounts_db.store_for_tests(
+            slot,
+            &[(&pubkey, &account), (&Pubkey::new_unique(), &account)],
+        );
+        accounts_db.calculate_accounts_delta_hash(slot);
+    }
+    // do not root `new_slot`!
+    accounts_db.add_root_and_flush_write_cache(old_slot);
+
+    // for this test, `new_slot` must not be a root
+    assert!(accounts_db.accounts_index.is_uncleaned_root(old_slot));
+    assert!(!accounts_db.accounts_index.is_uncleaned_root(new_slot));
+    assert!(!accounts_db.accounts_index.is_alive_root(new_slot));
+
+    // ensure the slot list for `pubkey` has both the old and new slots
+    let slot_list = accounts_db
+        .accounts_index
+        .get_bin(&pubkey)
+        .slot_list_mut(&pubkey, |slot_list| slot_list.clone())
+        .unwrap();
+    assert_eq!(slot_list.len(), slots.len());
+    assert!(slot_list.iter().map(|(slot, _)| slot).eq(slots.iter()));
+
+    // `clean` should *not* reclaim the account in `old_slot` because `new_slot` is not a root
+    accounts_db.clean_accounts_for_tests();
+
+    // ensure we have NOT reclaimed the account in `old_slot`
+    let slot_list = accounts_db
+        .accounts_index
+        .get_bin(&pubkey)
+        .slot_list_mut(&pubkey, |slot_list| slot_list.clone())
+        .unwrap();
+    assert_eq!(slot_list.len(), slots.len());
+    assert!(slot_list.iter().map(|(slot, _)| slot).eq(slots.iter()));
+}