Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tweak VecCache to improve performance #138405

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 47 additions & 18 deletions compiler/rustc_data_structures/src/vec_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ struct Slot<V> {
struct SlotIndex {
// the index of the bucket in VecCache (0 to 20)
bucket_idx: usize,
// number of entries in that bucket
entries: usize,
// the index of the slot within the bucket
index_in_bucket: usize,
}
Expand All @@ -44,7 +42,7 @@ const ENTRIES_BY_BUCKET: [usize; 21] = {
let mut key = 0;
loop {
let si = SlotIndex::from_index(key);
entries[si.bucket_idx] = si.entries;
entries[si.bucket_idx] = si.entries();
if key == 0 {
key = 1;
} else if key == (1 << 31) {
Expand All @@ -56,7 +54,14 @@ const ENTRIES_BY_BUCKET: [usize; 21] = {
entries
};

const BUCKETS: usize = 21;

impl SlotIndex {
/// The total possible number of entries in the bucket
const fn entries(&self) -> usize {
if self.bucket_idx == 0 { 1 << 12 } else { 1 << (self.bucket_idx + 11) }
}

// This unpacks a flat u32 index into identifying which bucket it belongs to and the offset
// within that bucket. As noted in the VecCache docs, buckets double in size with each index.
// Typically that would mean 31 buckets (2^0 + 2^1 ... + 2^31 = u32::MAX - 1), but to reduce
Expand All @@ -72,18 +77,16 @@ impl SlotIndex {
Some(x) => x as usize,
None => 0,
};
let entries;
let running_sum;
if bucket <= 11 {
entries = 1 << 12;
running_sum = 0;
bucket = 0;
} else {
entries = 1 << bucket;
let entries = 1 << bucket;
running_sum = entries;
bucket = bucket - 11;
}
SlotIndex { bucket_idx: bucket, entries, index_in_bucket: idx as usize - running_sum }
SlotIndex { bucket_idx: bucket, index_in_bucket: idx as usize - running_sum }
}

// SAFETY: Buckets must be managed solely by functions here (i.e., get/put on SlotIndex) and
Expand All @@ -98,7 +101,7 @@ impl SlotIndex {
if ptr.is_null() {
return None;
}
assert!(self.index_in_bucket < self.entries);
debug_assert!(self.index_in_bucket < self.entries());
// SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
// must be inbounds.
let slot = unsafe { ptr.add(self.index_in_bucket) };
Expand Down Expand Up @@ -126,11 +129,12 @@ impl SlotIndex {

fn bucket_ptr<V>(&self, bucket: &AtomicPtr<Slot<V>>) -> *mut Slot<V> {
let ptr = bucket.load(Ordering::Acquire);
if ptr.is_null() { self.initialize_bucket(bucket) } else { ptr }
if ptr.is_null() { Self::initialize_bucket(bucket, self.bucket_idx) } else { ptr }
}

#[cold]
fn initialize_bucket<V>(&self, bucket: &AtomicPtr<Slot<V>>) -> *mut Slot<V> {
#[inline(never)]
fn initialize_bucket<V>(bucket: &AtomicPtr<Slot<V>>, bucket_idx: usize) -> *mut Slot<V> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It avoids Self needing it exist when not inlined, as the needed information can be passed in registers instead.

static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());

// If we are initializing the bucket, then acquire a global lock.
Expand All @@ -144,8 +148,8 @@ impl SlotIndex {
// OK, now under the allocator lock, if we're still null then it's definitely us that will
// initialize this bucket.
if ptr.is_null() {
let bucket_layout =
std::alloc::Layout::array::<Slot<V>>(self.entries as usize).unwrap();
let bucket_len = SlotIndex { bucket_idx, index_in_bucket: 0 }.entries();
let bucket_layout = std::alloc::Layout::array::<Slot<V>>(bucket_len).unwrap();
// This is more of a sanity check -- this code is very cold, so it's safe to pay a
// little extra cost here.
assert!(bucket_layout.size() > 0);
Expand All @@ -171,7 +175,7 @@ impl SlotIndex {
let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };
let ptr = self.bucket_ptr(bucket);

assert!(self.index_in_bucket < self.entries);
debug_assert!(self.index_in_bucket < self.entries());
// SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
// must be inbounds.
let slot = unsafe { ptr.add(self.index_in_bucket) };
Expand Down Expand Up @@ -204,6 +208,31 @@ impl SlotIndex {
Err(_) => false,
}
}

/// Inserts into the map, given that the slot is unique, so it won't race with other threads.
#[inline]
unsafe fn put_unique<V>(&self, buckets: &[AtomicPtr<Slot<V>>; 21], value: V, extra: u32) {
// SAFETY: `bucket_idx` is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
// in-bounds of buckets.
let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };
let ptr = self.bucket_ptr(bucket);

debug_assert!(self.index_in_bucket < self.entries());
// SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
// must be inbounds.
let slot = unsafe { ptr.add(self.index_in_bucket) };

// SAFETY: We known our slot is unique as a precondition of this function, so this can't race.
unsafe {
(&raw mut (*slot).value).write(value);
}

// SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for
// AtomicU32 access.
let index_and_lock = unsafe { &(*slot).index_and_lock };

index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release);
}
}

/// In-memory cache for queries whose keys are densely-numbered IDs
Expand All @@ -229,11 +258,11 @@ pub struct VecCache<K: Idx, V, I> {
// Bucket 19: 1073741824
// Bucket 20: 2147483648
// The total number of entries if all buckets are initialized is u32::MAX-1.
buckets: [AtomicPtr<Slot<V>>; 21],
buckets: [AtomicPtr<Slot<V>>; BUCKETS],

// In the compiler's current usage these are only *read* during incremental and self-profiling.
// They are an optimization over iterating the full buckets array.
present: [AtomicPtr<Slot<()>>; 21],
present: [AtomicPtr<Slot<()>>; BUCKETS],
len: AtomicUsize,

key: PhantomData<(K, I)>,
Expand Down Expand Up @@ -307,9 +336,9 @@ where
let slot_idx = SlotIndex::from_index(key);
if slot_idx.put(&self.buckets, value, index.index() as u32) {
let present_idx = self.len.fetch_add(1, Ordering::Relaxed);
let slot = SlotIndex::from_index(present_idx as u32);
// We should always be uniquely putting due to `len` fetch_add returning unique values.
assert!(slot.put(&self.present, (), key));
let slot = SlotIndex::from_index(u32::try_from(present_idx).unwrap());
// SAFETY: We should always be uniquely putting due to `len` fetch_add returning unique values.
unsafe { slot.put_unique(&self.present, (), key) };
}
}

Expand Down
15 changes: 13 additions & 2 deletions compiler/rustc_data_structures/src/vec_cache/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,13 @@ fn slot_entries_table() {
);
}

#[test]
fn bucket_entries_matches() {
for i in 0..BUCKETS {
assert_eq!(SlotIndex { bucket_idx: i, index_in_bucket: 0 }.entries(), ENTRIES_BY_BUCKET[i]);
}
}

#[test]
#[cfg(not(miri))]
fn slot_index_exhaustive() {
Expand All @@ -78,6 +85,10 @@ fn slot_index_exhaustive() {
let mut prev = None::<SlotIndex>;
for idx in 0..=u32::MAX {
let slot_idx = SlotIndex::from_index(idx);

assert!(slot_idx.index_in_bucket < slot_idx.entries());
assert!(slot_idx.bucket_idx < BUCKETS);

if let Some(p) = prev {
if p.bucket_idx == slot_idx.bucket_idx {
assert_eq!(p.index_in_bucket + 1, slot_idx.index_in_bucket);
Expand All @@ -90,8 +101,8 @@ fn slot_index_exhaustive() {
assert_eq!(slot_idx.bucket_idx, 0);
}

assert_eq!(buckets[slot_idx.bucket_idx], slot_idx.entries as u32);
assert_eq!(ENTRIES_BY_BUCKET[slot_idx.bucket_idx], slot_idx.entries, "{}", idx);
assert_eq!(buckets[slot_idx.bucket_idx], slot_idx.entries() as u32);
assert_eq!(ENTRIES_BY_BUCKET[slot_idx.bucket_idx], slot_idx.entries(), "{}", idx);

prev = Some(slot_idx);
}
Expand Down
Loading