Skip to content

Commit ab40a7c

Browse files
committed
Auto merge of #43815 - alexcrichton:optimize-alloc, r=sfackler
Optimize allocation paths in RawVec Since the `Alloc` trait was introduced (#42313) and it was integrated everywhere (#42727) there's been some slowdowns and regressions that have slipped through. The intention of this PR is to try to tackle at least some of them, but they've been very difficult to quantify up to this point so it probably doesn't solve everything. This PR primarily targets the `RawVec` type, specifically the `double` function. The codegen for this function is now much closer to what it was before #42313 landed as many runtime checks have been elided.
2 parents a80a873 + 3a83165 commit ab40a7c

File tree

3 files changed

+133
-76
lines changed

3 files changed

+133
-76
lines changed

src/liballoc/allocator.rs

+4
Original file line numberDiff line numberDiff line change
@@ -354,15 +354,19 @@ pub enum AllocErr {
354354
}
355355

356356
impl AllocErr {
357+
#[inline]
357358
pub fn invalid_input(details: &'static str) -> Self {
358359
AllocErr::Unsupported { details: details }
359360
}
361+
#[inline]
360362
pub fn is_memory_exhausted(&self) -> bool {
361363
if let AllocErr::Exhausted { .. } = *self { true } else { false }
362364
}
365+
#[inline]
363366
pub fn is_request_unsupported(&self) -> bool {
364367
if let AllocErr::Unsupported { .. } = *self { true } else { false }
365368
}
369+
#[inline]
366370
pub fn description(&self) -> &str {
367371
match *self {
368372
AllocErr::Exhausted { .. } => "allocator memory exhausted",

src/liballoc/heap.rs

+2
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ pub mod __core {
2828
extern "Rust" {
2929
#[allocator]
3030
fn __rust_alloc(size: usize, align: usize, err: *mut u8) -> *mut u8;
31+
#[cold]
3132
fn __rust_oom(err: *const u8) -> !;
3233
fn __rust_dealloc(ptr: *mut u8, size: usize, align: usize);
3334
fn __rust_usable_size(layout: *const u8,
@@ -81,6 +82,7 @@ unsafe impl Alloc for Heap {
8182
}
8283

8384
#[inline]
85+
#[cold]
8486
fn oom(&mut self, err: AllocErr) -> ! {
8587
unsafe {
8688
__rust_oom(&err as *const AllocErr as *const u8)

src/liballoc/raw_vec.rs

+127-76
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,13 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
use allocator::{Alloc, Layout};
12-
use core::ptr::{self, Unique};
11+
use core::cmp;
1312
use core::mem;
13+
use core::ops::Drop;
14+
use core::ptr::{self, Unique};
1415
use core::slice;
15-
use heap::Heap;
16+
use heap::{Alloc, Layout, Heap};
1617
use super::boxed::Box;
17-
use core::ops::Drop;
18-
use core::cmp;
1918

2019
/// A low-level utility for more ergonomically allocating, reallocating, and deallocating
2120
/// a buffer of memory on the heap without having to worry about all the corner cases
@@ -222,6 +221,20 @@ impl<T, A: Alloc> RawVec<T, A> {
222221
&mut self.a
223222
}
224223

224+
fn current_layout(&self) -> Option<Layout> {
225+
if self.cap == 0 {
226+
None
227+
} else {
228+
// We have an allocated chunk of memory, so we can bypass runtime
229+
// checks to get our current layout.
230+
unsafe {
231+
let align = mem::align_of::<T>();
232+
let size = mem::size_of::<T>() * self.cap;
233+
Some(Layout::from_size_align_unchecked(size, align))
234+
}
235+
}
236+
}
237+
225238
/// Doubles the size of the type's backing allocation. This is common enough
226239
/// to want to do that it's easiest to just have a dedicated method. Slightly
227240
/// more efficient logic can be provided for this than the general case.
@@ -280,27 +293,40 @@ impl<T, A: Alloc> RawVec<T, A> {
280293
// 0, getting to here necessarily means the RawVec is overfull.
281294
assert!(elem_size != 0, "capacity overflow");
282295

283-
let (new_cap, ptr_res) = if self.cap == 0 {
284-
// skip to 4 because tiny Vec's are dumb; but not if that would cause overflow
285-
let new_cap = if elem_size > (!0) / 8 { 1 } else { 4 };
286-
let ptr_res = self.a.alloc_array::<T>(new_cap);
287-
(new_cap, ptr_res)
288-
} else {
289-
// Since we guarantee that we never allocate more than isize::MAX bytes,
290-
// `elem_size * self.cap <= isize::MAX` as a precondition, so this can't overflow
291-
let new_cap = 2 * self.cap;
292-
let new_alloc_size = new_cap * elem_size;
293-
alloc_guard(new_alloc_size);
294-
let ptr_res = self.a.realloc_array(self.ptr, self.cap, new_cap);
295-
(new_cap, ptr_res)
296-
};
297-
298-
// If allocate or reallocate fail, we'll get `null` back
299-
let uniq = match ptr_res {
300-
Err(err) => self.a.oom(err),
301-
Ok(uniq) => uniq,
296+
let (new_cap, uniq) = match self.current_layout() {
297+
Some(cur) => {
298+
// Since we guarantee that we never allocate more than
299+
// isize::MAX bytes, `elem_size * self.cap <= isize::MAX` as
300+
// a precondition, so this can't overflow. Additionally the
301+
// alignment will never be too large as to "not be
302+
// satisfiable", so `Layout::from_size_align` will always
303+
// return `Some`.
304+
//
305+
// tl;dr; we bypass runtime checks due to dynamic assertions
306+
// in this module, allowing us to use
307+
// `from_size_align_unchecked`.
308+
let new_cap = 2 * self.cap;
309+
let new_size = new_cap * elem_size;
310+
let new_layout = Layout::from_size_align_unchecked(new_size, cur.align());
311+
alloc_guard(new_size);
312+
let ptr_res = self.a.realloc(self.ptr.as_ptr() as *mut u8,
313+
cur,
314+
new_layout);
315+
match ptr_res {
316+
Ok(ptr) => (new_cap, Unique::new_unchecked(ptr as *mut T)),
317+
Err(e) => self.a.oom(e),
318+
}
319+
}
320+
None => {
321+
// skip to 4 because tiny Vec's are dumb; but not if that
322+
// would cause overflow
323+
let new_cap = if elem_size > (!0) / 8 { 1 } else { 4 };
324+
match self.a.alloc_array::<T>(new_cap) {
325+
Ok(ptr) => (new_cap, ptr),
326+
Err(e) => self.a.oom(e),
327+
}
328+
}
302329
};
303-
304330
self.ptr = uniq;
305331
self.cap = new_cap;
306332
}
@@ -323,21 +349,27 @@ impl<T, A: Alloc> RawVec<T, A> {
323349
pub fn double_in_place(&mut self) -> bool {
324350
unsafe {
325351
let elem_size = mem::size_of::<T>();
352+
let old_layout = match self.current_layout() {
353+
Some(layout) => layout,
354+
None => return false, // nothing to double
355+
};
326356

327357
// since we set the capacity to usize::MAX when elem_size is
328358
// 0, getting to here necessarily means the RawVec is overfull.
329359
assert!(elem_size != 0, "capacity overflow");
330360

331-
// Since we guarantee that we never allocate more than isize::MAX bytes,
332-
// `elem_size * self.cap <= isize::MAX` as a precondition, so this can't overflow
361+
// Since we guarantee that we never allocate more than isize::MAX
362+
// bytes, `elem_size * self.cap <= isize::MAX` as a precondition, so
363+
// this can't overflow.
364+
//
365+
// Similarly like with `double` above we can go straight to
366+
// `Layout::from_size_align_unchecked` as we know this won't
367+
// overflow and the alignment is sufficiently small.
333368
let new_cap = 2 * self.cap;
334-
let new_alloc_size = new_cap * elem_size;
335-
336-
alloc_guard(new_alloc_size);
337-
369+
let new_size = new_cap * elem_size;
370+
alloc_guard(new_size);
338371
let ptr = self.ptr() as *mut _;
339-
let old_layout = Layout::new::<T>().repeat(self.cap).unwrap().0;
340-
let new_layout = Layout::new::<T>().repeat(new_cap).unwrap().0;
372+
let new_layout = Layout::from_size_align_unchecked(new_size, old_layout.align());
341373
match self.a.grow_in_place(ptr, old_layout, new_layout) {
342374
Ok(_) => {
343375
// We can't directly divide `size`.
@@ -373,8 +405,6 @@ impl<T, A: Alloc> RawVec<T, A> {
373405
/// Aborts on OOM
374406
pub fn reserve_exact(&mut self, used_cap: usize, needed_extra_cap: usize) {
375407
unsafe {
376-
let elem_size = mem::size_of::<T>();
377-
378408
// NOTE: we don't early branch on ZSTs here because we want this
379409
// to actually catch "asking for more than usize::MAX" in that case.
380410
// If we make it past the first branch then we are guaranteed to
@@ -388,21 +418,22 @@ impl<T, A: Alloc> RawVec<T, A> {
388418

389419
// Nothing we can really do about these checks :(
390420
let new_cap = used_cap.checked_add(needed_extra_cap).expect("capacity overflow");
391-
let new_alloc_size = new_cap.checked_mul(elem_size).expect("capacity overflow");
392-
alloc_guard(new_alloc_size);
393-
394-
let result = if self.cap == 0 {
395-
self.a.alloc_array::<T>(new_cap)
396-
} else {
397-
self.a.realloc_array(self.ptr, self.cap, new_cap)
421+
let new_layout = match Layout::array::<T>(new_cap) {
422+
Some(layout) => layout,
423+
None => panic!("capacity overflow"),
398424
};
399-
400-
// If allocate or reallocate fail, we'll get `null` back
401-
let uniq = match result {
402-
Err(err) => self.a.oom(err),
403-
Ok(uniq) => uniq,
425+
alloc_guard(new_layout.size());
426+
let res = match self.current_layout() {
427+
Some(layout) => {
428+
let old_ptr = self.ptr.as_ptr() as *mut u8;
429+
self.a.realloc(old_ptr, layout, new_layout)
430+
}
431+
None => self.a.alloc(new_layout),
432+
};
433+
let uniq = match res {
434+
Ok(ptr) => Unique::new_unchecked(ptr as *mut T),
435+
Err(e) => self.a.oom(e),
404436
};
405-
406437
self.ptr = uniq;
407438
self.cap = new_cap;
408439
}
@@ -411,17 +442,14 @@ impl<T, A: Alloc> RawVec<T, A> {
411442
/// Calculates the buffer's new size given that it'll hold `used_cap +
412443
/// needed_extra_cap` elements. This logic is used in amortized reserve methods.
413444
/// Returns `(new_capacity, new_alloc_size)`.
414-
fn amortized_new_size(&self, used_cap: usize, needed_extra_cap: usize) -> (usize, usize) {
415-
let elem_size = mem::size_of::<T>();
445+
fn amortized_new_size(&self, used_cap: usize, needed_extra_cap: usize) -> usize {
416446
// Nothing we can really do about these checks :(
417447
let required_cap = used_cap.checked_add(needed_extra_cap)
418448
.expect("capacity overflow");
419449
// Cannot overflow, because `cap <= isize::MAX`, and type of `cap` is `usize`.
420450
let double_cap = self.cap * 2;
421451
// `double_cap` guarantees exponential growth.
422-
let new_cap = cmp::max(double_cap, required_cap);
423-
let new_alloc_size = new_cap.checked_mul(elem_size).expect("capacity overflow");
424-
(new_cap, new_alloc_size)
452+
cmp::max(double_cap, required_cap)
425453
}
426454

427455
/// Ensures that the buffer contains at least enough space to hold
@@ -489,21 +517,25 @@ impl<T, A: Alloc> RawVec<T, A> {
489517
return;
490518
}
491519

492-
let (new_cap, new_alloc_size) = self.amortized_new_size(used_cap, needed_extra_cap);
493-
// FIXME: may crash and burn on over-reserve
494-
alloc_guard(new_alloc_size);
520+
let new_cap = self.amortized_new_size(used_cap, needed_extra_cap);
495521

496-
let result = if self.cap == 0 {
497-
self.a.alloc_array::<T>(new_cap)
498-
} else {
499-
self.a.realloc_array(self.ptr, self.cap, new_cap)
522+
let new_layout = match Layout::array::<T>(new_cap) {
523+
Some(layout) => layout,
524+
None => panic!("capacity overflow"),
500525
};
501-
502-
let uniq = match result {
503-
Err(err) => self.a.oom(err),
504-
Ok(uniq) => uniq,
526+
// FIXME: may crash and burn on over-reserve
527+
alloc_guard(new_layout.size());
528+
let res = match self.current_layout() {
529+
Some(layout) => {
530+
let old_ptr = self.ptr.as_ptr() as *mut u8;
531+
self.a.realloc(old_ptr, layout, new_layout)
532+
}
533+
None => self.a.alloc(new_layout),
534+
};
535+
let uniq = match res {
536+
Ok(ptr) => Unique::new_unchecked(ptr as *mut T),
537+
Err(e) => self.a.oom(e),
505538
};
506-
507539
self.ptr = uniq;
508540
self.cap = new_cap;
509541
}
@@ -536,21 +568,24 @@ impl<T, A: Alloc> RawVec<T, A> {
536568
// Don't actually need any more capacity. If the current `cap` is 0, we can't
537569
// reallocate in place.
538570
// Wrapping in case they give a bad `used_cap`
539-
if self.cap().wrapping_sub(used_cap) >= needed_extra_cap || self.cap == 0 {
571+
let old_layout = match self.current_layout() {
572+
Some(layout) => layout,
573+
None => return false,
574+
};
575+
if self.cap().wrapping_sub(used_cap) >= needed_extra_cap {
540576
return false;
541577
}
542578

543-
let (new_cap, new_alloc_size) = self.amortized_new_size(used_cap, needed_extra_cap);
544-
// FIXME: may crash and burn on over-reserve
545-
alloc_guard(new_alloc_size);
579+
let new_cap = self.amortized_new_size(used_cap, needed_extra_cap);
546580

547581
// Here, `cap < used_cap + needed_extra_cap <= new_cap`
548582
// (regardless of whether `self.cap - used_cap` wrapped).
549583
// Therefore we can safely call grow_in_place.
550584

551585
let ptr = self.ptr() as *mut _;
552-
let old_layout = Layout::new::<T>().repeat(self.cap).unwrap().0;
553586
let new_layout = Layout::new::<T>().repeat(new_cap).unwrap().0;
587+
// FIXME: may crash and burn on over-reserve
588+
alloc_guard(new_layout.size());
554589
match self.a.grow_in_place(ptr, old_layout, new_layout) {
555590
Ok(_) => {
556591
self.cap = new_cap;
@@ -599,9 +634,24 @@ impl<T, A: Alloc> RawVec<T, A> {
599634
}
600635
} else if self.cap != amount {
601636
unsafe {
602-
match self.a.realloc_array(self.ptr, self.cap, amount) {
637+
// We know here that our `amount` is greater than zero. This
638+
// implies, via the assert above, that capacity is also greater
639+
// than zero, which means that we've got a current layout that
640+
// "fits"
641+
//
642+
// We also know that `self.cap` is greater than `amount`, and
643+
// consequently we don't need runtime checks for creating either
644+
// layout
645+
let old_size = elem_size * self.cap;
646+
let new_size = elem_size * amount;
647+
let align = mem::align_of::<T>();
648+
let old_layout = Layout::from_size_align_unchecked(old_size, align);
649+
let new_layout = Layout::from_size_align_unchecked(new_size, align);
650+
match self.a.realloc(self.ptr.as_ptr() as *mut u8,
651+
old_layout,
652+
new_layout) {
653+
Ok(p) => self.ptr = Unique::new_unchecked(p as *mut T),
603654
Err(err) => self.a.oom(err),
604-
Ok(uniq) => self.ptr = uniq,
605655
}
606656
}
607657
self.cap = amount;
@@ -631,10 +681,11 @@ impl<T, A: Alloc> RawVec<T, A> {
631681
/// Frees the memory owned by the RawVec *without* trying to Drop its contents.
632682
pub unsafe fn dealloc_buffer(&mut self) {
633683
let elem_size = mem::size_of::<T>();
634-
if elem_size != 0 && self.cap != 0 {
635-
let ptr = self.ptr() as *mut u8;
636-
let layout = Layout::new::<T>().repeat(self.cap).unwrap().0;
637-
self.a.dealloc(ptr, layout);
684+
if elem_size != 0 {
685+
if let Some(layout) = self.current_layout() {
686+
let ptr = self.ptr() as *mut u8;
687+
self.a.dealloc(ptr, layout);
688+
}
638689
}
639690
}
640691
}

0 commit comments

Comments
 (0)