Skip to content

Commit

Permalink
always pack a few newest ancient slots (#1730)
Browse files Browse the repository at this point in the history
* always pack a few newest ancient slots

* pr feedback

* remove extra ()

* adds high slot tests

---------

Co-authored-by: brooks <brooks@anza.xyz>
  • Loading branch information
jeffwashington and brooksprumo authored Jun 18, 2024
1 parent 2dc9508 commit d2b3476
Showing 1 changed file with 145 additions and 12 deletions.
157 changes: 145 additions & 12 deletions accounts-db/src/ancient_append_vecs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ use {
},
};

/// this many # of highest slot values should be treated as desirable to pack.
/// This gives us high slots to move packed accounts into.
const HIGH_SLOT_OFFSET: u64 = 100;

/// ancient packing algorithm tuning per pass
#[derive(Debug)]
struct PackedAncientStorageTuning {
Expand Down Expand Up @@ -57,6 +61,9 @@ struct SlotInfo {
alive_bytes: u64,
/// true if this should be shrunk due to ratio
should_shrink: bool,
/// this slot is a high slot #
/// It is important to include some high slot #s so that we have new slots to try each time pack runs.
is_high_slot: bool,
}

/// info for all storages in ancient slots
Expand All @@ -83,6 +90,7 @@ impl AncientSlotInfos {
storage: Arc<AccountStorageEntry>,
can_randomly_shrink: bool,
ideal_size: NonZeroU64,
is_high_slot: bool,
) -> bool {
let mut was_randomly_shrunk = false;
let alive_bytes = storage.alive_bytes() as u64;
Expand Down Expand Up @@ -122,6 +130,7 @@ impl AncientSlotInfos {
storage,
alive_bytes,
should_shrink,
is_high_slot,
});
self.total_alive_bytes += alive_bytes;
}
Expand Down Expand Up @@ -198,14 +207,18 @@ impl AncientSlotInfos {
let storages_remaining = total_storages - i - 1;

// if the remaining uncombined storages and the # of resulting
// combined ancient storages is less than the threshold, then
// combined ancient storages are less than the threshold, then
// we've gone too far, so get rid of this entry and all after it.
// Every storage after this one is larger.
// Every storage after this one is larger than the ones we've chosen.
// if we ever get to more than `max_resulting_storages` required ancient storages, that is enough to stop for now.
// It will take a while to create that many. This should be a limit that only affects
// extreme testing environments.
if storages_remaining + ancient_storages_required < low_threshold
|| ancient_storages_required as u64 > u64::from(tuning.max_resulting_storages)
// It will take a lot of time for the pack algorithm to create that many, and that is bad for system performance.
// This should be a limit that only affects extreme testing environments.
// We do not stop including entries until we have dealt with all the high slot #s. This allows the algorithm to continue
// to make progress each time it is called. There are exceptions that can cause the pack to fail, such as accounts with multiple
// refs.
if !info.is_high_slot
&& (storages_remaining + ancient_storages_required < low_threshold
|| ancient_storages_required as u64 > u64::from(tuning.max_resulting_storages))
{
self.all_infos.truncate(i);
break;
Expand All @@ -229,10 +242,15 @@ impl AncientSlotInfos {
return;
}

// sort by 'should_shrink' then smallest capacity to largest
// sort by:
// 1. `high_slot`: we want to include new, high slots each time so that we try new slots
// each time alg runs and have several high target slots for packed storages.
// 2. 'should_shrink' so we make progress on shrinking ancient storages
// 3. smallest capacity to largest so that we remove the most slots possible
self.all_infos.sort_unstable_by(|l, r| {
r.should_shrink
.cmp(&l.should_shrink)
r.is_high_slot
.cmp(&l.is_high_slot)
.then_with(|| r.should_shrink.cmp(&l.should_shrink))
.then_with(|| l.capacity.cmp(&r.capacity))
});

Expand Down Expand Up @@ -498,10 +516,20 @@ impl AccountsDb {
..AncientSlotInfos::default()
};
let mut randoms = 0;
let max_slot = slots.iter().max().cloned().unwrap_or_default();
// heuristic to include some # of newly eligible ancient slots so that the pack algorithm always makes progress
let high_slot_boundary = max_slot.saturating_sub(HIGH_SLOT_OFFSET);
let is_high_slot = |slot| slot >= high_slot_boundary;

for slot in &slots {
if let Some(storage) = self.storage.get_slot_storage_entry(*slot) {
if infos.add(*slot, storage, can_randomly_shrink, ideal_size) {
if infos.add(
*slot,
storage,
can_randomly_shrink,
ideal_size,
is_high_slot(*slot),
) {
randoms += 1;
}
}
Expand Down Expand Up @@ -1081,12 +1109,13 @@ pub mod tests {
append_vec::{aligned_stored_size, AppendVec, AppendVecStoredAccountMeta},
storable_accounts::{tests::build_accounts_from_storage, StorableAccountsBySlot},
},
rand::seq::SliceRandom as _,
solana_sdk::{
account::{AccountSharedData, ReadableAccount, WritableAccount},
hash::Hash,
pubkey::Pubkey,
},
std::ops::Range,
std::{collections::HashSet, ops::Range},
strum::IntoEnumIterator,
strum_macros::EnumIter,
};
Expand All @@ -1105,6 +1134,7 @@ pub mod tests {
let original_stores = (0..slots)
.filter_map(|slot| db.storage.get_slot_storage_entry((slot as Slot) + slot1))
.collect::<Vec<_>>();
let is_high_slot = false;
let slot_infos = original_stores
.iter()
.map(|storage| SlotInfo {
Expand All @@ -1113,6 +1143,7 @@ pub mod tests {
capacity: 0,
alive_bytes: 0,
should_shrink: false,
is_high_slot,
})
.collect();
(
Expand Down Expand Up @@ -2379,6 +2410,7 @@ pub mod tests {
let mut infos = AncientSlotInfos::default();
let storage = db.storage.get_slot_storage_entry(slot1).unwrap();
let alive_bytes_expected = storage.alive_bytes();
let high_slot = false;
match method {
TestCollectInfo::Add => {
// test lower level 'add'
Expand All @@ -2387,6 +2419,7 @@ pub mod tests {
Arc::clone(&storage),
can_randomly_shrink,
NonZeroU64::new(get_ancient_append_vec_capacity()).unwrap(),
high_slot,
);
}
TestCollectInfo::CalcAncientSlotInfo => {
Expand Down Expand Up @@ -2445,12 +2478,14 @@ pub mod tests {
let (db, slot1) = create_db_with_storages_and_index(alive, slots, None);
let mut infos = AncientSlotInfos::default();
let storage = db.storage.get_slot_storage_entry(slot1).unwrap();
let high_slot = false;
if call_add {
infos.add(
slot1,
Arc::clone(&storage),
can_randomly_shrink,
NonZeroU64::new(get_ancient_append_vec_capacity()).unwrap(),
high_slot,
);
} else {
infos = db.calc_ancient_slot_info(
Expand Down Expand Up @@ -2628,6 +2663,7 @@ pub mod tests {
capacity: 1,
alive_bytes: 1,
should_shrink: false,
is_high_slot: false,
})
.collect(),
shrink_indexes: (0..count).collect(),
Expand Down Expand Up @@ -2671,7 +2707,7 @@ pub mod tests {
}

#[test]
fn test_filter_by_smaller_capacity_sort() {
fn test_filter_by_smallest_capacity_sort() {
// max is 6
// 7 storages
// storage[last] is big enough to cause us to need another storage
Expand Down Expand Up @@ -2729,6 +2765,101 @@ pub mod tests {
}
}

/// Test that we always include the high slots when filtering which ancient infos to pack
///
/// If we have *more* high slots than max resulting storages set in the tuning parameters,
/// we should still have all the high slots after calling `filter_by_smallest_capacity().
#[test]
fn test_filter_by_smallest_capacity_high_slot_more() {
let tuning = default_tuning();

// Ensure we have more storages with high slots than the 'max resulting storages'.
let num_high_slots = tuning.max_resulting_storages.get() * 2;
let num_ancient_storages = num_high_slots * 3;
let mut infos = create_test_infos(num_ancient_storages as usize);
infos
.all_infos
.sort_unstable_by_key(|slot_info| slot_info.slot);
infos
.all_infos
.iter_mut()
.rev()
.take(num_high_slots as usize)
.for_each(|slot_info| {
slot_info.is_high_slot = true;
});
let slots_expected: Vec<_> = infos
.all_infos
.iter()
.filter_map(|slot_info| slot_info.is_high_slot.then_some(slot_info.slot))
.collect();

// shuffle the infos so they actually need to be sorted
infos.all_infos.shuffle(&mut thread_rng());
infos.filter_by_smallest_capacity(&tuning);

infos
.all_infos
.sort_unstable_by_key(|slot_info| slot_info.slot);
let slots_actual: Vec<_> = infos
.all_infos
.iter()
.map(|slot_info| slot_info.slot)
.collect();
assert_eq!(infos.all_infos.len() as u64, num_high_slots);
assert_eq!(slots_actual, slots_expected);
}

/// Test that we always include the high slots when filtering which ancient infos to pack
///
/// If we have *less* high slots than max resulting storages set in the tuning parameters,
/// we should still have all the high slots after calling `filter_by_smallest_capacity().
#[test]
fn test_filter_by_smallest_capacity_high_slot_less() {
let tuning = default_tuning();

// Ensure we have less storages with high slots than the 'max resulting storages'.
let num_high_slots = tuning.max_resulting_storages.get() / 2;
let num_ancient_storages = num_high_slots * 5;
let mut infos = create_test_infos(num_ancient_storages as usize);
infos
.all_infos
.sort_unstable_by_key(|slot_info| slot_info.slot);
infos
.all_infos
.iter_mut()
.rev()
.take(num_high_slots as usize)
.for_each(|slot_info| {
slot_info.is_high_slot = true;
});
let high_slots: Vec<_> = infos
.all_infos
.iter()
.filter_map(|slot_info| slot_info.is_high_slot.then_some(slot_info.slot))
.collect();

// shuffle the infos so they actually need to be sorted
infos.all_infos.shuffle(&mut thread_rng());
infos.filter_by_smallest_capacity(&tuning);

infos
.all_infos
.sort_unstable_by_key(|slot_info| slot_info.slot);
let slots_actual: HashSet<_> = infos
.all_infos
.iter()
.map(|slot_info| slot_info.slot)
.collect();
assert_eq!(
infos.all_infos.len() as u64,
tuning.max_resulting_storages.get() - 1,
);
assert!(high_slots
.iter()
.all(|high_slot| slots_actual.contains(high_slot)));
}

fn test(filter: bool, infos: &mut AncientSlotInfos, tuning: &PackedAncientStorageTuning) {
if filter {
infos.filter_by_smallest_capacity(tuning);
Expand Down Expand Up @@ -3212,13 +3343,15 @@ pub mod tests {
capacity: info1_capacity,
alive_bytes: 0,
should_shrink: false,
is_high_slot: false,
};
let info2 = SlotInfo {
storage: storage.clone(),
slot,
capacity: 2,
alive_bytes: 1,
should_shrink: false,
is_high_slot: false,
};
let mut infos = AncientSlotInfos {
all_infos: vec![info1, info2],
Expand Down

0 comments on commit d2b3476

Please sign in to comment.