Auto merge of rust-lang#138405 - Zoxc:vec-cache-tweaks, r=<try>

bors · bors · commit d06bb14bb9c4 · 2025-03-12T18:57:33.000Z
Tweak `VecCache` to improve performance

This has some tweaks to `VecCache` to improve performance.

- It saves a `compare_exchange` in `complete` using the new `put_unique` function.
- It removes bound checks on entries. These are instead checked in the `slot_index_exhaustive` test.
- `initialize_bucket` is outlined and tuned for that.

cc `@Mark-Simulacrum`
diff --git a/compiler/rustc_data_structures/src/vec_cache.rs b/compiler/rustc_data_structures/src/vec_cache.rs
@@ -29,8 +29,6 @@ struct Slot<V> {
 struct SlotIndex {
     // the index of the bucket in VecCache (0 to 20)
     bucket_idx: usize,
-    // number of entries in that bucket
-    entries: usize,
     // the index of the slot within the bucket
     index_in_bucket: usize,
 }
@@ -44,7 +42,7 @@ const ENTRIES_BY_BUCKET: [usize; 21] = {
     let mut key = 0;
     loop {
         let si = SlotIndex::from_index(key);
-        entries[si.bucket_idx] = si.entries;
+        entries[si.bucket_idx] = si.entries();
         if key == 0 {
             key = 1;
         } else if key == (1 << 31) {
@@ -56,7 +54,14 @@ const ENTRIES_BY_BUCKET: [usize; 21] = {
     entries
 };
 
+const BUCKETS: usize = 21;
+
 impl SlotIndex {
+    /// The total possible number of entries in the bucket
+    const fn entries(&self) -> usize {
+        if self.bucket_idx == 0 { 1 << 12 } else { 1 << (self.bucket_idx + 11) }
+    }
+
     // This unpacks a flat u32 index into identifying which bucket it belongs to and the offset
     // within that bucket. As noted in the VecCache docs, buckets double in size with each index.
     // Typically that would mean 31 buckets (2^0 + 2^1 ... + 2^31 = u32::MAX - 1), but to reduce
@@ -72,18 +77,16 @@ impl SlotIndex {
             Some(x) => x as usize,
             None => 0,
         };
-        let entries;
         let running_sum;
         if bucket <= 11 {
-            entries = 1 << 12;
             running_sum = 0;
             bucket = 0;
         } else {
-            entries = 1 << bucket;
+            let entries = 1 << bucket;
             running_sum = entries;
             bucket = bucket - 11;
         }
-        SlotIndex { bucket_idx: bucket, entries, index_in_bucket: idx as usize - running_sum }
+        SlotIndex { bucket_idx: bucket, index_in_bucket: idx as usize - running_sum }
     }
 
     // SAFETY: Buckets must be managed solely by functions here (i.e., get/put on SlotIndex) and
@@ -98,7 +101,7 @@ impl SlotIndex {
         if ptr.is_null() {
             return None;
         }
-        assert!(self.index_in_bucket < self.entries);
+        debug_assert!(self.index_in_bucket < self.entries());
         // SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
         // must be inbounds.
         let slot = unsafe { ptr.add(self.index_in_bucket) };
@@ -126,11 +129,12 @@ impl SlotIndex {
 
     fn bucket_ptr<V>(&self, bucket: &AtomicPtr<Slot<V>>) -> *mut Slot<V> {
         let ptr = bucket.load(Ordering::Acquire);
-        if ptr.is_null() { self.initialize_bucket(bucket) } else { ptr }
+        if ptr.is_null() { Self::initialize_bucket(bucket, self.bucket_idx) } else { ptr }
     }
 
     #[cold]
-    fn initialize_bucket<V>(&self, bucket: &AtomicPtr<Slot<V>>) -> *mut Slot<V> {
+    #[inline(never)]
+    fn initialize_bucket<V>(bucket: &AtomicPtr<Slot<V>>, bucket_idx: usize) -> *mut Slot<V> {
         static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
 
         // If we are initializing the bucket, then acquire a global lock.
@@ -144,8 +148,8 @@ impl SlotIndex {
         // OK, now under the allocator lock, if we're still null then it's definitely us that will
         // initialize this bucket.
         if ptr.is_null() {
-            let bucket_layout =
-                std::alloc::Layout::array::<Slot<V>>(self.entries as usize).unwrap();
+            let bucket_len = SlotIndex { bucket_idx, index_in_bucket: 0 }.entries();
+            let bucket_layout = std::alloc::Layout::array::<Slot<V>>(bucket_len).unwrap();
             // This is more of a sanity check -- this code is very cold, so it's safe to pay a
             // little extra cost here.
             assert!(bucket_layout.size() > 0);
@@ -171,7 +175,7 @@ impl SlotIndex {
         let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };
         let ptr = self.bucket_ptr(bucket);
 
-        assert!(self.index_in_bucket < self.entries);
+        debug_assert!(self.index_in_bucket < self.entries());
         // SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
         // must be inbounds.
         let slot = unsafe { ptr.add(self.index_in_bucket) };
@@ -204,6 +208,31 @@ impl SlotIndex {
             Err(_) => false,
         }
     }
+
+    /// Inserts into the map, given that the slot is unique, so it won't race with other threads.
+    #[inline]
+    unsafe fn put_unique<V>(&self, buckets: &[AtomicPtr<Slot<V>>; 21], value: V, extra: u32) {
+        // SAFETY: `bucket_idx` is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
+        // in-bounds of buckets.
+        let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };
+        let ptr = self.bucket_ptr(bucket);
+
+        debug_assert!(self.index_in_bucket < self.entries());
+        // SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
+        // must be inbounds.
+        let slot = unsafe { ptr.add(self.index_in_bucket) };
+
+        // SAFETY: We known our slot is unique as a precondition of this function, so this can't race.
+        unsafe {
+            (&raw mut (*slot).value).write(value);
+        }
+
+        // SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for
+        // AtomicU32 access.
+        let index_and_lock = unsafe { &(*slot).index_and_lock };
+
+        index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release);
+    }
 }
 
 /// In-memory cache for queries whose keys are densely-numbered IDs
@@ -229,11 +258,11 @@ pub struct VecCache<K: Idx, V, I> {
     // Bucket 19: 1073741824
     // Bucket 20: 2147483648
     // The total number of entries if all buckets are initialized is u32::MAX-1.
-    buckets: [AtomicPtr<Slot<V>>; 21],
+    buckets: [AtomicPtr<Slot<V>>; BUCKETS],
 
     // In the compiler's current usage these are only *read* during incremental and self-profiling.
     // They are an optimization over iterating the full buckets array.
-    present: [AtomicPtr<Slot<()>>; 21],
+    present: [AtomicPtr<Slot<()>>; BUCKETS],
     len: AtomicUsize,
 
     key: PhantomData<(K, I)>,
@@ -307,9 +336,9 @@ where
         let slot_idx = SlotIndex::from_index(key);
         if slot_idx.put(&self.buckets, value, index.index() as u32) {
             let present_idx = self.len.fetch_add(1, Ordering::Relaxed);
-            let slot = SlotIndex::from_index(present_idx as u32);
-            // We should always be uniquely putting due to `len` fetch_add returning unique values.
-            assert!(slot.put(&self.present, (), key));
+            let slot = SlotIndex::from_index(u32::try_from(present_idx).unwrap());
+            // SAFETY: We should always be uniquely putting due to `len` fetch_add returning unique values.
+            unsafe { slot.put_unique(&self.present, (), key) };
         }
     }
 
diff --git a/compiler/rustc_data_structures/src/vec_cache/tests.rs b/compiler/rustc_data_structures/src/vec_cache/tests.rs
@@ -68,6 +68,13 @@ fn slot_entries_table() {
     );
 }
 
+#[test]
+fn bucket_entries_matches() {
+    for i in 0..BUCKETS {
+        assert_eq!(SlotIndex { bucket_idx: i, index_in_bucket: 0 }.entries(), ENTRIES_BY_BUCKET[i]);
+    }
+}
+
 #[test]
 #[cfg(not(miri))]
 fn slot_index_exhaustive() {
@@ -78,6 +85,10 @@ fn slot_index_exhaustive() {
     let mut prev = None::<SlotIndex>;
     for idx in 0..=u32::MAX {
         let slot_idx = SlotIndex::from_index(idx);
+
+        assert!(slot_idx.index_in_bucket < slot_idx.entries());
+        assert!(slot_idx.bucket_idx < BUCKETS);
+
         if let Some(p) = prev {
             if p.bucket_idx == slot_idx.bucket_idx {
                 assert_eq!(p.index_in_bucket + 1, slot_idx.index_in_bucket);
@@ -90,8 +101,8 @@ fn slot_index_exhaustive() {
             assert_eq!(slot_idx.bucket_idx, 0);
         }
 
-        assert_eq!(buckets[slot_idx.bucket_idx], slot_idx.entries as u32);
-        assert_eq!(ENTRIES_BY_BUCKET[slot_idx.bucket_idx], slot_idx.entries, "{}", idx);
+        assert_eq!(buckets[slot_idx.bucket_idx], slot_idx.entries() as u32);
+        assert_eq!(ENTRIES_BY_BUCKET[slot_idx.bucket_idx], slot_idx.entries(), "{}", idx);
 
         prev = Some(slot_idx);
     }