Skip to content

Commit d06bb14

Browse files
committed
Auto merge of rust-lang#138405 - Zoxc:vec-cache-tweaks, r=<try>
Tweak `VecCache` to improve performance This has some tweaks to `VecCache` to improve performance. - It saves a `compare_exchange` in `complete` using the new `put_unique` function. - It removes bound checks on entries. These are instead checked in the `slot_index_exhaustive` test. - `initialize_bucket` is outlined and tuned for that. cc `@Mark-Simulacrum`
2 parents aaa2d47 + 00e7039 commit d06bb14

File tree

2 files changed

+60
-20
lines changed

2 files changed

+60
-20
lines changed

compiler/rustc_data_structures/src/vec_cache.rs

+47-18
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@ struct Slot<V> {
2929
struct SlotIndex {
3030
// the index of the bucket in VecCache (0 to 20)
3131
bucket_idx: usize,
32-
// number of entries in that bucket
33-
entries: usize,
3432
// the index of the slot within the bucket
3533
index_in_bucket: usize,
3634
}
@@ -44,7 +42,7 @@ const ENTRIES_BY_BUCKET: [usize; 21] = {
4442
let mut key = 0;
4543
loop {
4644
let si = SlotIndex::from_index(key);
47-
entries[si.bucket_idx] = si.entries;
45+
entries[si.bucket_idx] = si.entries();
4846
if key == 0 {
4947
key = 1;
5048
} else if key == (1 << 31) {
@@ -56,7 +54,14 @@ const ENTRIES_BY_BUCKET: [usize; 21] = {
5654
entries
5755
};
5856

57+
const BUCKETS: usize = 21;
58+
5959
impl SlotIndex {
60+
/// The total possible number of entries in the bucket
61+
const fn entries(&self) -> usize {
62+
if self.bucket_idx == 0 { 1 << 12 } else { 1 << (self.bucket_idx + 11) }
63+
}
64+
6065
// This unpacks a flat u32 index into identifying which bucket it belongs to and the offset
6166
// within that bucket. As noted in the VecCache docs, buckets double in size with each index.
6267
// Typically that would mean 31 buckets (2^0 + 2^1 ... + 2^31 = u32::MAX - 1), but to reduce
@@ -72,18 +77,16 @@ impl SlotIndex {
7277
Some(x) => x as usize,
7378
None => 0,
7479
};
75-
let entries;
7680
let running_sum;
7781
if bucket <= 11 {
78-
entries = 1 << 12;
7982
running_sum = 0;
8083
bucket = 0;
8184
} else {
82-
entries = 1 << bucket;
85+
let entries = 1 << bucket;
8386
running_sum = entries;
8487
bucket = bucket - 11;
8588
}
86-
SlotIndex { bucket_idx: bucket, entries, index_in_bucket: idx as usize - running_sum }
89+
SlotIndex { bucket_idx: bucket, index_in_bucket: idx as usize - running_sum }
8790
}
8891

8992
// SAFETY: Buckets must be managed solely by functions here (i.e., get/put on SlotIndex) and
@@ -98,7 +101,7 @@ impl SlotIndex {
98101
if ptr.is_null() {
99102
return None;
100103
}
101-
assert!(self.index_in_bucket < self.entries);
104+
debug_assert!(self.index_in_bucket < self.entries());
102105
// SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
103106
// must be inbounds.
104107
let slot = unsafe { ptr.add(self.index_in_bucket) };
@@ -126,11 +129,12 @@ impl SlotIndex {
126129

127130
fn bucket_ptr<V>(&self, bucket: &AtomicPtr<Slot<V>>) -> *mut Slot<V> {
128131
let ptr = bucket.load(Ordering::Acquire);
129-
if ptr.is_null() { self.initialize_bucket(bucket) } else { ptr }
132+
if ptr.is_null() { Self::initialize_bucket(bucket, self.bucket_idx) } else { ptr }
130133
}
131134

132135
#[cold]
133-
fn initialize_bucket<V>(&self, bucket: &AtomicPtr<Slot<V>>) -> *mut Slot<V> {
136+
#[inline(never)]
137+
fn initialize_bucket<V>(bucket: &AtomicPtr<Slot<V>>, bucket_idx: usize) -> *mut Slot<V> {
134138
static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
135139

136140
// If we are initializing the bucket, then acquire a global lock.
@@ -144,8 +148,8 @@ impl SlotIndex {
144148
// OK, now under the allocator lock, if we're still null then it's definitely us that will
145149
// initialize this bucket.
146150
if ptr.is_null() {
147-
let bucket_layout =
148-
std::alloc::Layout::array::<Slot<V>>(self.entries as usize).unwrap();
151+
let bucket_len = SlotIndex { bucket_idx, index_in_bucket: 0 }.entries();
152+
let bucket_layout = std::alloc::Layout::array::<Slot<V>>(bucket_len).unwrap();
149153
// This is more of a sanity check -- this code is very cold, so it's safe to pay a
150154
// little extra cost here.
151155
assert!(bucket_layout.size() > 0);
@@ -171,7 +175,7 @@ impl SlotIndex {
171175
let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };
172176
let ptr = self.bucket_ptr(bucket);
173177

174-
assert!(self.index_in_bucket < self.entries);
178+
debug_assert!(self.index_in_bucket < self.entries());
175179
// SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
176180
// must be inbounds.
177181
let slot = unsafe { ptr.add(self.index_in_bucket) };
@@ -204,6 +208,31 @@ impl SlotIndex {
204208
Err(_) => false,
205209
}
206210
}
211+
212+
/// Inserts into the map, given that the slot is unique, so it won't race with other threads.
213+
#[inline]
214+
unsafe fn put_unique<V>(&self, buckets: &[AtomicPtr<Slot<V>>; 21], value: V, extra: u32) {
215+
// SAFETY: `bucket_idx` is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
216+
// in-bounds of buckets.
217+
let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };
218+
let ptr = self.bucket_ptr(bucket);
219+
220+
debug_assert!(self.index_in_bucket < self.entries());
221+
// SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
222+
// must be inbounds.
223+
let slot = unsafe { ptr.add(self.index_in_bucket) };
224+
225+
// SAFETY: We known our slot is unique as a precondition of this function, so this can't race.
226+
unsafe {
227+
(&raw mut (*slot).value).write(value);
228+
}
229+
230+
// SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for
231+
// AtomicU32 access.
232+
let index_and_lock = unsafe { &(*slot).index_and_lock };
233+
234+
index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release);
235+
}
207236
}
208237

209238
/// In-memory cache for queries whose keys are densely-numbered IDs
@@ -229,11 +258,11 @@ pub struct VecCache<K: Idx, V, I> {
229258
// Bucket 19: 1073741824
230259
// Bucket 20: 2147483648
231260
// The total number of entries if all buckets are initialized is u32::MAX-1.
232-
buckets: [AtomicPtr<Slot<V>>; 21],
261+
buckets: [AtomicPtr<Slot<V>>; BUCKETS],
233262

234263
// In the compiler's current usage these are only *read* during incremental and self-profiling.
235264
// They are an optimization over iterating the full buckets array.
236-
present: [AtomicPtr<Slot<()>>; 21],
265+
present: [AtomicPtr<Slot<()>>; BUCKETS],
237266
len: AtomicUsize,
238267

239268
key: PhantomData<(K, I)>,
@@ -307,9 +336,9 @@ where
307336
let slot_idx = SlotIndex::from_index(key);
308337
if slot_idx.put(&self.buckets, value, index.index() as u32) {
309338
let present_idx = self.len.fetch_add(1, Ordering::Relaxed);
310-
let slot = SlotIndex::from_index(present_idx as u32);
311-
// We should always be uniquely putting due to `len` fetch_add returning unique values.
312-
assert!(slot.put(&self.present, (), key));
339+
let slot = SlotIndex::from_index(u32::try_from(present_idx).unwrap());
340+
// SAFETY: We should always be uniquely putting due to `len` fetch_add returning unique values.
341+
unsafe { slot.put_unique(&self.present, (), key) };
313342
}
314343
}
315344

compiler/rustc_data_structures/src/vec_cache/tests.rs

+13-2
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,13 @@ fn slot_entries_table() {
6868
);
6969
}
7070

71+
#[test]
72+
fn bucket_entries_matches() {
73+
for i in 0..BUCKETS {
74+
assert_eq!(SlotIndex { bucket_idx: i, index_in_bucket: 0 }.entries(), ENTRIES_BY_BUCKET[i]);
75+
}
76+
}
77+
7178
#[test]
7279
#[cfg(not(miri))]
7380
fn slot_index_exhaustive() {
@@ -78,6 +85,10 @@ fn slot_index_exhaustive() {
7885
let mut prev = None::<SlotIndex>;
7986
for idx in 0..=u32::MAX {
8087
let slot_idx = SlotIndex::from_index(idx);
88+
89+
assert!(slot_idx.index_in_bucket < slot_idx.entries());
90+
assert!(slot_idx.bucket_idx < BUCKETS);
91+
8192
if let Some(p) = prev {
8293
if p.bucket_idx == slot_idx.bucket_idx {
8394
assert_eq!(p.index_in_bucket + 1, slot_idx.index_in_bucket);
@@ -90,8 +101,8 @@ fn slot_index_exhaustive() {
90101
assert_eq!(slot_idx.bucket_idx, 0);
91102
}
92103

93-
assert_eq!(buckets[slot_idx.bucket_idx], slot_idx.entries as u32);
94-
assert_eq!(ENTRIES_BY_BUCKET[slot_idx.bucket_idx], slot_idx.entries, "{}", idx);
104+
assert_eq!(buckets[slot_idx.bucket_idx], slot_idx.entries() as u32);
105+
assert_eq!(ENTRIES_BY_BUCKET[slot_idx.bucket_idx], slot_idx.entries(), "{}", idx);
95106

96107
prev = Some(slot_idx);
97108
}

0 commit comments

Comments
 (0)