Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make SyncDroplessArena allocations thread-local in the fast path #61873

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 132 additions & 43 deletions src/libarena/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
extern crate alloc;

use rustc_data_structures::cold_path;
use rustc_data_structures::sync::MTLock;
use rustc_data_structures::sync::{SharedWorkerLocal, WorkerLocal, Lock};
use smallvec::SmallVec;

use std::cell::{Cell, RefCell};
Expand Down Expand Up @@ -123,11 +123,6 @@ impl<T> Default for TypedArena<T> {
}

impl<T> TypedArena<T> {
pub fn in_arena(&self, ptr: *const T) -> bool {
let ptr = ptr as *const T as *mut T;

self.chunks.borrow().iter().any(|chunk| chunk.start() <= ptr && ptr < chunk.end())
}
/// Allocates an object in the `TypedArena`, returning a reference to it.
#[inline]
pub fn alloc(&self, object: T) -> &mut T {
Expand Down Expand Up @@ -378,12 +373,6 @@ impl Default for DroplessArena {
}

impl DroplessArena {
pub fn in_arena<T: ?Sized>(&self, ptr: *const T) -> bool {
let ptr = ptr as *const u8 as *mut u8;

self.chunks.borrow().iter().any(|chunk| chunk.start() <= ptr && ptr < chunk.end())
}

#[inline]
fn align(&self, align: usize) {
let final_address = ((self.ptr.get() as usize) + align - 1) & !(align - 1);
Expand Down Expand Up @@ -555,64 +544,164 @@ impl DroplessArena {
}
}

#[derive(Default)]
// FIXME(@Zoxc): this type is entirely unused in rustc
pub struct SyncTypedArena<T> {
lock: MTLock<TypedArena<T>>,
struct CurrentChunk<T> {
/// A pointer to the next object to be allocated.
ptr: Cell<*mut T>,

/// A pointer to the end of the allocated area. When this pointer is
/// reached, a new chunk is allocated.
end: Cell<*mut T>,
}

impl<T> SyncTypedArena<T> {
#[inline(always)]
pub fn alloc(&self, object: T) -> &mut T {
// Extend the lifetime of the result since it's limited to the lock guard
unsafe { &mut *(self.lock.lock().alloc(object) as *mut T) }
impl<T> Default for CurrentChunk<T> {
#[inline]
fn default() -> Self {
CurrentChunk {
// We set both `ptr` and `end` to 0 so that the first call to
// alloc() will trigger a grow().
ptr: Cell::new(0 as *mut T),
end: Cell::new(0 as *mut T),
}
}
}

#[inline(always)]
pub fn alloc_slice(&self, slice: &[T]) -> &mut [T]
where
T: Copy,
{
// Extend the lifetime of the result since it's limited to the lock guard
unsafe { &mut *(self.lock.lock().alloc_slice(slice) as *mut [T]) }
impl<T> CurrentChunk<T> {
#[inline]
fn align(&self, align: usize) {
let final_address = ((self.ptr.get() as usize) + align - 1) & !(align - 1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self.ptr.set(final_address as *mut T);
assert!(self.ptr <= self.end);
}

/// Grows the arena.
#[inline(always)]
pub fn clear(&mut self) {
self.lock.get_mut().clear();
fn grow(&self, n: usize, chunks: &mut Vec<TypedArenaChunk<T>>) {
unsafe {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be great if these large unsafe { .. } blocks would have comments justifying their soundness.
(Applies throughout the PR.)

let (chunk, mut new_capacity);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move chunk declaration to initialization site

if let Some(last_chunk) = chunks.last_mut() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The statements in the Some block of this if let deserve some documenting comments.

let used_bytes = self.ptr.get() as usize - last_chunk.start() as usize;
let currently_used_cap = used_bytes / mem::size_of::<T>();
last_chunk.entries = currently_used_cap;
if last_chunk.storage.reserve_in_place(currently_used_cap, n) {
self.end.set(last_chunk.end());
return;
} else {
new_capacity = last_chunk.storage.cap();
loop {
new_capacity = new_capacity.checked_mul(2).unwrap();
if new_capacity >= currently_used_cap + n {
break;
}
}
}
} else {
let elem_size = cmp::max(1, mem::size_of::<T>());
new_capacity = cmp::max(n, PAGE / elem_size);
}
chunk = TypedArenaChunk::<T>::new(new_capacity);
self.ptr.set(chunk.start());
self.end.set(chunk.end());
chunks.push(chunk);
}
}
}

#[derive(Default)]
pub struct SyncDroplessArena {
lock: MTLock<DroplessArena>,
/// Pointers to the current chunk
current: WorkerLocal<CurrentChunk<u8>>,

/// A vector of arena chunks.
chunks: Lock<SharedWorkerLocal<Vec<TypedArenaChunk<u8>>>>,
}

impl Default for SyncDroplessArena {
#[inline]
fn default() -> SyncDroplessArena {
SyncDroplessArena {
current: WorkerLocal::new(|_| CurrentChunk::default()),
chunks: Default::default(),
}
}
}

impl SyncDroplessArena {
#[inline(always)]
pub fn in_arena<T: ?Sized>(&self, ptr: *const T) -> bool {
self.lock.lock().in_arena(ptr)
let ptr = ptr as *const u8 as *mut u8;

self.chunks.lock().iter().any(|chunks| chunks.iter().any(|chunk| {
chunk.start() <= ptr && ptr < chunk.end()
}))
}

#[inline(always)]
#[inline(never)]
#[cold]
fn grow(&self, needed_bytes: usize) {
self.current.grow(needed_bytes, &mut **self.chunks.lock());
}

#[inline]
pub fn alloc_raw(&self, bytes: usize, align: usize) -> &mut [u8] {
// Extend the lifetime of the result since it's limited to the lock guard
unsafe { &mut *(self.lock.lock().alloc_raw(bytes, align) as *mut [u8]) }
unsafe {
assert!(bytes != 0);

let current = &*self.current;

current.align(align);

let future_end = intrinsics::arith_offset(current.ptr.get(), bytes as isize);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not use https://doc.rust-lang.org/std/primitive.pointer.html#method.wrapping_offset here? Intrinsics should not be used except in their wrapper code

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alternatively you can use offset_from to compute the offset between end and ptr and see if that is bigger than bytes. I think I'd prefer it that way around.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, we stabilized this intrinsic? That seems unfortunate as it can be used to create invalid pointers (which are UB in C).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not stable. and it's still unsafe, so if you think there are problems with it, please raise them on the tracking issue (#41079). wrapping_offset_from is safe.

if (future_end as *mut u8) >= current.end.get() {
self.grow(bytes);
}

let ptr = current.ptr.get();
// Set the pointer past ourselves
current.ptr.set(
intrinsics::arith_offset(current.ptr.get(), bytes as isize) as *mut u8,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since we just grew the thing, we can now use ptr::offset, right?

);
slice::from_raw_parts_mut(ptr, bytes)
}
}

#[inline(always)]
#[inline]
pub fn alloc<T>(&self, object: T) -> &mut T {
// Extend the lifetime of the result since it's limited to the lock guard
unsafe { &mut *(self.lock.lock().alloc(object) as *mut T) }
assert!(!mem::needs_drop::<T>());

let mem = self.alloc_raw(
mem::size_of::<T>(),
mem::align_of::<T>()) as *mut _ as *mut T;

unsafe {
// Write into uninitialized memory.
ptr::write(mem, object);
&mut *mem
}
}

#[inline(always)]
/// Allocates a slice of objects that are copied into the `SyncDroplessArena`, returning a
/// mutable reference to it. Will panic if passed a zero-sized type.
///
/// Panics:
///
/// - Zero-sized types
/// - Zero-length slices
#[inline]
pub fn alloc_slice<T>(&self, slice: &[T]) -> &mut [T]
where
T: Copy,
{
// Extend the lifetime of the result since it's limited to the lock guard
unsafe { &mut *(self.lock.lock().alloc_slice(slice) as *mut [T]) }
assert!(!mem::needs_drop::<T>());
assert!(mem::size_of::<T>() != 0);
assert!(!slice.is_empty());

let mem = self.alloc_raw(
slice.len() * mem::size_of::<T>(),
mem::align_of::<T>()) as *mut _ as *mut T;

unsafe {
let arena_slice = slice::from_raw_parts_mut(mem, slice.len());
arena_slice.copy_from_slice(slice);
arena_slice
}
}
}

Expand Down
87 changes: 87 additions & 0 deletions src/librustc_data_structures/sync.rs
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,45 @@ cfg_if! {
}
}

#[derive(Debug, Default)]
pub struct SharedWorkerLocal<T>(T);

impl<T> SharedWorkerLocal<T> {
/// Creates a new worker local where the `initial` closure computes the
/// value this worker local should take for each thread in the thread pool.
#[inline]
pub fn new<F: FnMut(usize) -> T>(mut f: F) -> SharedWorkerLocal<T> {
SharedWorkerLocal(f(0))
}

#[inline]
pub fn iter(&self) -> impl Iterator<Item=&T> {
Some(&self.0).into_iter()
}

/// Returns the worker-local value for each thread
#[inline]
pub fn into_inner(self) -> Vec<T> {
vec![self.0]
}
}

impl<T> Deref for SharedWorkerLocal<T> {
type Target = T;

#[inline(always)]
fn deref(&self) -> &T {
&self.0
}
}

impl<T> DerefMut for SharedWorkerLocal<T> {
#[inline(always)]
fn deref_mut(&mut self) -> &mut T {
&mut self.0
}
}

pub type MTRef<'a, T> = &'a mut T;

#[derive(Debug, Default)]
Expand Down Expand Up @@ -337,6 +376,54 @@ cfg_if! {
}

pub use rayon_core::WorkerLocal;
pub use rayon_core::Registry;
use rayon_core::current_thread_index;

#[derive(Debug)]
pub struct SharedWorkerLocal<T>(Vec<T>);

impl<T> SharedWorkerLocal<T> {
/// Creates a new worker local where the `initial` closure computes the
/// value this worker local should take for each thread in the thread pool.
#[inline]
pub fn new<F: FnMut(usize) -> T>(mut f: F) -> SharedWorkerLocal<T> {
SharedWorkerLocal((0..Registry::current_num_threads()).map(|i| f(i)).collect())
}

#[inline]
pub fn iter(&self) -> impl Iterator<Item=&T> {
self.0.iter()
}

/// Returns the worker-local value for each thread
#[inline]
pub fn into_inner(self) -> Vec<T> {
self.0
}
}

impl<T: Default> Default for SharedWorkerLocal<T> {
#[inline]
fn default() -> Self {
SharedWorkerLocal::new(|_| Default::default())
}
}

impl<T> Deref for SharedWorkerLocal<T> {
type Target = T;

#[inline(always)]
fn deref(&self) -> &T {
&self.0[current_thread_index().unwrap()]
}
}

impl<T> DerefMut for SharedWorkerLocal<T> {
#[inline(always)]
fn deref_mut(&mut self) -> &mut T {
&mut self.0[current_thread_index().unwrap()]
}
}

pub use rayon::iter::ParallelIterator;
use rayon::iter::IntoParallelIterator;
Expand Down