Skip to content

Commit 13e1402

Browse files
Jeremy Braunfacebook-github-bot
Jeremy Braun
authored andcommitted
Make FrozenThinBoxSlice even thinner
Summary: For short FrozenThingBoxSlice objects, embed the length into the pointer. This is the same optimization in D66519157, except applied to to `FrozenThinBoxSlice`. Reviewed By: JakobDegen Differential Revision: D66773980 fbshipit-source-id: 11ebe5e28dcc4b74e8ee54cb02a55e2e6c842751
1 parent e013ce4 commit 13e1402

File tree

2 files changed

+162
-58
lines changed

2 files changed

+162
-58
lines changed

starlark/src/values/thin_box_slice_frozen_value/packed_impl.rs

+31-7
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use std::ops::Deref;
2121
use std::ptr::NonNull;
2222

2323
use either::Either;
24+
use static_assertions::const_assert;
2425

2526
use crate::values::thin_box_slice_frozen_value::thin_box::AllocatedThinBoxSlice;
2627
use crate::values::FrozenValue;
@@ -35,8 +36,10 @@ struct PackedImpl(NonNull<()>);
3536

3637
impl PackedImpl {
3738
const fn new_allocated(allocated: AllocatedThinBoxSlice<FrozenValue>) -> Self {
38-
let allocated = unsafe { allocated.into_inner().byte_add(1) };
39-
Self(allocated.cast::<()>())
39+
// ensure that there is space for the lower, extra bit
40+
const_assert!(std::mem::align_of::<AllocatedThinBoxSlice<FrozenValue>>() > 1);
41+
let allocated = unsafe { NonNull::new_unchecked((allocated.into_inner() + 1) as *mut ()) };
42+
Self(allocated)
4043
}
4144

4245
fn new(iter: impl IntoIterator<Item = FrozenValue>) -> Self {
@@ -56,11 +59,8 @@ impl PackedImpl {
5659
let ptr = self.0.as_ptr();
5760
if (ptr as usize) & 1 == 1 {
5861
let allocated = (ptr as usize & !1) as *mut FrozenValue;
59-
let allocated = unsafe {
60-
AllocatedThinBoxSlice::<FrozenValue>::from_inner(
61-
NonNull::<FrozenValue>::new_unchecked(allocated),
62-
)
63-
};
62+
let allocated =
63+
unsafe { AllocatedThinBoxSlice::<FrozenValue>::from_inner(allocated as usize) };
6464
Either::Right(allocated)
6565
} else {
6666
let val = unsafe { &*(self as *const PackedImpl as *const FrozenValue) };
@@ -172,6 +172,10 @@ impl<'v> Eq for ThinBoxSliceFrozenValue<'v> {}
172172

173173
#[cfg(test)]
174174
mod tests {
175+
use std::mem;
176+
177+
use super::AllocatedThinBoxSlice;
178+
use super::PackedImpl;
175179
use super::ThinBoxSliceFrozenValue;
176180
use crate::values::int::inline_int::InlineInt;
177181
use crate::values::FrozenHeap;
@@ -223,4 +227,24 @@ mod tests {
223227
let val = ThinBoxSliceFrozenValue::default();
224228
assert_eq!(val.len(), 0);
225229
}
230+
231+
#[test]
232+
fn test_empty() {
233+
let val_a = ThinBoxSliceFrozenValue::empty();
234+
let val_b = ThinBoxSliceFrozenValue::empty();
235+
// Check that the empty value is the same for all empty values so that we're not doing extra allocations
236+
assert_eq!(val_a.0.0.as_ptr(), val_b.0.0.as_ptr());
237+
238+
// Since this and PackedImpl are closely tied together, provide some
239+
// low-level checks that the representations are what we expect.
240+
let val_c = PackedImpl::new([].into_iter());
241+
let val_d = AllocatedThinBoxSlice::<FrozenValue>::empty();
242+
assert_eq!(val_a.0.0.as_ptr(), val_c.0.as_ptr());
243+
assert_eq!(mem::size_of_val(&val_c), std::mem::size_of_val(&val_d));
244+
assert_eq!(mem::size_of_val(&val_c), std::mem::size_of::<usize>());
245+
assert_eq!(1, val_c.0.as_ptr() as usize);
246+
assert_eq!(0, unsafe {
247+
std::mem::transmute::<AllocatedThinBoxSlice<FrozenValue>, usize>(val_d)
248+
});
249+
}
226250
}

starlark/src/values/thin_box_slice_frozen_value/thin_box.rs

+131-51
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ use std::alloc::Layout;
2626
use std::fmt::Debug;
2727
use std::hash::Hash;
2828
use std::hash::Hasher;
29+
use std::marker::PhantomData;
2930
use std::mem;
3031
use std::mem::MaybeUninit;
3132
use std::ops::Deref;
@@ -43,18 +44,35 @@ struct ThinBoxSliceLayout<T> {
4344
}
4445

4546
impl<T> ThinBoxSliceLayout<T> {
46-
fn offset_of_data() -> usize {
47-
mem::offset_of!(ThinBoxSliceLayout::<T>, data)
47+
const fn offset_of_data() -> isize {
48+
// SAFETY: rust guarantees no allocated object can be larger than isize::MAX bytes.
49+
mem::offset_of!(ThinBoxSliceLayout::<T>, data) as isize
4850
}
4951
}
5052

51-
/// `Box<[T]>` but thin pointer.
53+
/// `Box<[T]>` but thin pointer to FrozenValue(s)
5254
///
53-
/// Statically allocated for empty slice.
55+
/// Similar to `ThinBoxSlice`, but it ignores the lowest bit, allowing
56+
/// PackedImpl to use that to store a single FrozenValue in place of this
57+
/// object. Like `ThinBoxSlice`, the remaining unused pointer bits are used to
58+
/// store an embedded length. If these bits are zero, the `ptr` points to the
59+
/// `.data` of a ThinBoxSliceLayout, which stores the `.len`. Otherwise, ptr
60+
/// points at the T[].
61+
///
62+
///
63+
/// The current implementation returns what amounts to a null pointer for an
64+
/// empty list. An alternative would be to return a valid pointer to a
65+
/// statically allocated "long"-lengthed object with a length of 0. This would
66+
/// reduce the number of representations, but testing at the time of this
67+
/// writing shows that empty lists are common, and the pointer dereference in
68+
/// reading the length causes a small performance hit. Changes in the future may
69+
/// make this the preferred implementation.
70+
//
5471
// We don't really need `'static` here, but we hit type checker limitations.
5572
pub(super) struct AllocatedThinBoxSlice<T: 'static> {
5673
/// Pointer to the first element, `ThinBoxSliceLayout.data`.
57-
ptr: NonNull<T>,
74+
ptr: usize,
75+
phantom: PhantomData<T>,
5876
}
5977

6078
unsafe impl<T: Sync> Sync for AllocatedThinBoxSlice<T> {}
@@ -63,45 +81,86 @@ unsafe impl<T: Send> Send for AllocatedThinBoxSlice<T> {}
6381
impl<T: 'static> AllocatedThinBoxSlice<T> {
6482
#[inline]
6583
pub(super) const fn empty() -> AllocatedThinBoxSlice<T> {
66-
const fn instance<T>() -> &'static ThinBoxSliceLayout<T> {
67-
assert!(mem::size_of::<ThinBoxSliceLayout<T>>() <= mem::size_of::<u128>());
68-
assert!(mem::align_of::<ThinBoxSliceLayout<T>>() <= mem::align_of::<u128>());
69-
// Instead of just statically allocating a `ThinBoxSliceLayout<T>` we allocate a
70-
// `0_u128`. The reason for this is a rustc bug around const UB checks that otherwise
71-
// incorrectly fires: https://github.com/rust-lang/rust/issues/133523
72-
//
73-
// SAFETY: We just checked that the layout is small enough to fit in a u128.
74-
unsafe { &*ptr::from_ref(&0u128).cast::<ThinBoxSliceLayout<T>>() }
84+
AllocatedThinBoxSlice {
85+
ptr: 0,
86+
phantom: PhantomData,
7587
}
88+
}
7689

77-
unsafe {
78-
AllocatedThinBoxSlice {
79-
ptr: NonNull::new_unchecked(instance::<T>().data.as_ptr() as *mut T),
80-
}
81-
}
90+
const fn get_reserved_tag_bit_count() -> usize {
91+
// The lower bit is reserved for use by PackedImpl.
92+
1
93+
}
94+
95+
const fn get_unshifted_tag_bit_mask() -> usize {
96+
let align: usize = std::mem::align_of::<T>();
97+
assert!(align.is_power_of_two());
98+
align - 1
99+
}
100+
101+
const fn get_tag_bit_mask() -> usize {
102+
let mask = Self::get_unshifted_tag_bit_mask()
103+
>> AllocatedThinBoxSlice::<T>::get_reserved_tag_bit_count();
104+
assert!(mask != 0);
105+
mask
106+
}
107+
108+
const fn get_max_short_len() -> usize {
109+
Self::get_tag_bit_mask() + 1
82110
}
83111

84112
/// Allocation layout for a slice of length `len`.
85113
#[inline]
86-
fn layout_for_len(len: usize) -> Layout {
87-
let (layout, _offset_of_data) = Layout::new::<ThinBoxSliceLayout<T>>()
88-
.extend(Layout::array::<T>(len).unwrap())
89-
.unwrap();
90-
layout
114+
fn layout_for_len(len: usize) -> (bool, Layout) {
115+
if len != 0 && len != 1 && len <= Self::get_max_short_len() {
116+
(true, Layout::array::<T>(len).unwrap())
117+
} else {
118+
let (layout, _offset_of_data) = Layout::new::<ThinBoxSliceLayout<T>>()
119+
.extend(Layout::array::<T>(len).unwrap())
120+
.unwrap();
121+
(false, layout)
122+
}
123+
}
124+
125+
#[inline]
126+
fn get_tag_bits(&self) -> usize {
127+
(self.ptr & Self::get_unshifted_tag_bit_mask()) >> Self::get_reserved_tag_bit_count()
128+
}
129+
130+
#[inline]
131+
fn as_ptr(&self) -> *mut T {
132+
(self.ptr & !Self::get_unshifted_tag_bit_mask()) as *mut T
133+
}
134+
135+
#[inline]
136+
fn as_nonnull_ptr(&self) -> *mut T {
137+
let ptr = self.as_ptr();
138+
if ptr.is_null() {
139+
NonNull::<T>::dangling().as_ptr()
140+
} else {
141+
ptr
142+
}
91143
}
92144

93145
/// Length of the slice.
94146
// Not called `len` to avoid overload with `Deref::len`.
95147
#[inline]
96148
fn read_len(&self) -> usize {
97-
unsafe {
98-
(*self
99-
.ptr
100-
.as_ptr()
101-
.cast::<u8>()
102-
.sub(ThinBoxSliceLayout::<T>::offset_of_data())
103-
.cast::<ThinBoxSliceLayout<T>>())
104-
.len
149+
if self.as_ptr().is_null() {
150+
return 0;
151+
}
152+
153+
let bits = self.get_tag_bits();
154+
if bits != 0 {
155+
bits + 1
156+
} else {
157+
unsafe {
158+
(*self
159+
.as_ptr()
160+
.byte_offset(-ThinBoxSliceLayout::<T>::offset_of_data())
161+
.cast::<ThinBoxSliceLayout<T>>())
162+
.len
163+
}
105164
}
106165
}
107166

@@ -111,26 +170,41 @@ impl<T: 'static> AllocatedThinBoxSlice<T> {
111170
if len == 0 {
112171
AllocatedThinBoxSlice::empty()
113172
} else {
114-
let layout = Self::layout_for_len(len);
173+
let (is_short, layout) = Self::layout_for_len(len);
115174
unsafe {
116175
let alloc = alloc::alloc(layout);
117176
if alloc.is_null() {
118177
alloc::handle_alloc_error(layout);
119178
}
120-
ptr::write(alloc as *mut usize, len);
121-
let ptr = alloc.add(mem::size_of::<usize>()) as *mut MaybeUninit<T>;
122-
let ptr = NonNull::new_unchecked(ptr);
123-
AllocatedThinBoxSlice { ptr }
179+
if is_short {
180+
assert!((alloc as usize) & Self::get_unshifted_tag_bit_mask() == 0);
181+
AllocatedThinBoxSlice {
182+
// Embed the length in the lower bits of ptr
183+
ptr: (alloc as usize) | ((len - 1) << Self::get_reserved_tag_bit_count()),
184+
phantom: PhantomData,
185+
}
186+
} else {
187+
let alloc = alloc as *mut ThinBoxSliceLayout<T>;
188+
(*alloc).len = len;
189+
let data_ptr = alloc.byte_offset(ThinBoxSliceLayout::<T>::offset_of_data());
190+
AllocatedThinBoxSlice {
191+
ptr: data_ptr as usize,
192+
phantom: PhantomData,
193+
}
194+
}
124195
}
125196
}
126197
}
127198

128-
pub const unsafe fn into_inner(self) -> NonNull<T> {
199+
pub const unsafe fn into_inner(self) -> usize {
129200
self.ptr
130201
}
131202

132-
pub const unsafe fn from_inner(ptr: NonNull<T>) -> Self {
133-
Self { ptr }
203+
pub unsafe fn from_inner(ptr: usize) -> Self {
204+
Self {
205+
ptr,
206+
phantom: PhantomData,
207+
}
134208
}
135209
}
136210

@@ -139,22 +213,23 @@ impl<T: 'static> Deref for AllocatedThinBoxSlice<T> {
139213

140214
#[inline]
141215
fn deref(&self) -> &Self::Target {
142-
unsafe { slice::from_raw_parts(self.ptr.as_ptr(), self.read_len()) }
216+
unsafe { slice::from_raw_parts(self.as_nonnull_ptr(), self.read_len()) }
143217
}
144218
}
145219

146220
impl<T: 'static> DerefMut for AllocatedThinBoxSlice<T> {
147221
#[inline]
148222
fn deref_mut(&mut self) -> &mut Self::Target {
149-
unsafe { slice::from_raw_parts_mut(self.ptr.as_ptr(), self.read_len()) }
223+
unsafe { slice::from_raw_parts_mut(self.as_nonnull_ptr(), self.read_len()) }
150224
}
151225
}
152226

153227
impl<T> AllocatedThinBoxSlice<MaybeUninit<T>> {
154228
#[inline]
155229
unsafe fn assume_init(self) -> AllocatedThinBoxSlice<T> {
156230
AllocatedThinBoxSlice {
157-
ptr: self.ptr.cast::<T>(),
231+
ptr: self.ptr,
232+
phantom: PhantomData,
158233
}
159234
}
160235
}
@@ -165,10 +240,14 @@ impl<T: 'static> AllocatedThinBoxSlice<T> {
165240
unsafe {
166241
let len = self.read_len();
167242
if len != 0 {
168-
let slice = ptr::slice_from_raw_parts_mut(self.ptr.as_ptr(), len);
243+
let slice = ptr::slice_from_raw_parts_mut(self.as_nonnull_ptr(), len);
169244
ptr::drop_in_place(slice);
170-
let alloc = self.ptr.cast::<usize>().as_ptr().sub(1);
171-
alloc::dealloc(alloc as *mut u8, Self::layout_for_len(len));
245+
let mut alloc = self.as_ptr().cast::<u8>();
246+
let (is_short, layout) = Self::layout_for_len(len);
247+
if !is_short {
248+
alloc = alloc.byte_offset(-ThinBoxSliceLayout::<T>::offset_of_data());
249+
}
250+
alloc::dealloc(alloc, layout);
172251
}
173252
}
174253
}
@@ -246,11 +325,12 @@ impl<T: Allocative> Allocative for AllocatedThinBoxSlice<T> {
246325
let mut visitor =
247326
visitor.enter_unique(allocative::Key::new("ptr"), mem::size_of_val(&self.ptr));
248327
{
249-
let mut visitor = visitor.enter(
250-
allocative::Key::new("alloc"),
251-
Self::layout_for_len(self.len()).size(),
252-
);
253-
visitor.visit_simple(allocative::Key::new("len"), mem::size_of::<usize>());
328+
let (is_short, layout) = Self::layout_for_len(self.len());
329+
let mut visitor = visitor.enter(allocative::Key::new("alloc"), layout.size());
330+
331+
if !is_short {
332+
visitor.visit_simple(allocative::Key::new("len"), mem::size_of::<usize>());
333+
}
254334
{
255335
let mut visitor = visitor
256336
.enter(allocative::Key::new("data"), mem::size_of_val::<[_]>(self));

0 commit comments

Comments
 (0)