Skip to content

Commit a9cb27d

Browse files
authoredMar 22, 2023
Rollup merge of rust-lang#109179 - llogiq:intrinsically-option-as-slice, r=eholk
move Option::as_slice to intrinsic ```@scottmcm``` suggested on rust-lang#109095 I use a direct approach of unpacking the operation in MIR lowering, so here's the implementation. cc ```@nikic``` as this should hopefully unblock rust-lang#107224 (though perhaps other changes to the prior implementation, which I left for bootstrapping, are needed).
2 parents 68864e2 + 27e9ee9 commit a9cb27d

File tree

8 files changed

+152
-72
lines changed

8 files changed

+152
-72
lines changed
 

‎compiler/rustc_hir/src/lang_items.rs

+1
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ language_item_table! {
301301
Context, sym::Context, context, Target::Struct, GenericRequirement::None;
302302
FuturePoll, sym::poll, future_poll_fn, Target::Method(MethodKind::Trait { body: false }), GenericRequirement::None;
303303

304+
Option, sym::Option, option_type, Target::Enum, GenericRequirement::None;
304305
OptionSome, sym::Some, option_some_variant, Target::Variant, GenericRequirement::None;
305306
OptionNone, sym::None, option_none_variant, Target::Variant, GenericRequirement::None;
306307

‎compiler/rustc_hir_analysis/src/check/intrinsic.rs

+15
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,21 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) {
223223
],
224224
tcx.mk_ptr(ty::TypeAndMut { ty: param(0), mutbl: hir::Mutability::Not }),
225225
),
226+
sym::option_payload_ptr => {
227+
let option_def_id = tcx.require_lang_item(hir::LangItem::Option, None);
228+
let p0 = param(0);
229+
(
230+
1,
231+
vec![tcx.mk_ptr(ty::TypeAndMut {
232+
ty: tcx.mk_adt(
233+
tcx.adt_def(option_def_id),
234+
tcx.mk_substs_from_iter([ty::GenericArg::from(p0)].into_iter()),
235+
),
236+
mutbl: hir::Mutability::Not,
237+
})],
238+
tcx.mk_ptr(ty::TypeAndMut { ty: p0, mutbl: hir::Mutability::Not }),
239+
)
240+
}
226241
sym::ptr_mask => (
227242
1,
228243
vec![

‎compiler/rustc_mir_transform/src/lower_intrinsics.rs

+30
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use rustc_middle::ty::subst::SubstsRef;
66
use rustc_middle::ty::{self, Ty, TyCtxt};
77
use rustc_span::symbol::{sym, Symbol};
88
use rustc_span::Span;
9+
use rustc_target::abi::VariantIdx;
910

1011
pub struct LowerIntrinsics;
1112

@@ -191,6 +192,35 @@ impl<'tcx> MirPass<'tcx> for LowerIntrinsics {
191192
terminator.kind = TerminatorKind::Goto { target };
192193
}
193194
}
195+
sym::option_payload_ptr => {
196+
if let (Some(target), Some(arg)) = (*target, args[0].place()) {
197+
let ty::RawPtr(ty::TypeAndMut { ty: dest_ty, .. }) =
198+
destination.ty(local_decls, tcx).ty.kind()
199+
else { bug!(); };
200+
201+
block.statements.push(Statement {
202+
source_info: terminator.source_info,
203+
kind: StatementKind::Assign(Box::new((
204+
*destination,
205+
Rvalue::AddressOf(
206+
Mutability::Not,
207+
arg.project_deeper(
208+
&[
209+
PlaceElem::Deref,
210+
PlaceElem::Downcast(
211+
Some(sym::Some),
212+
VariantIdx::from_u32(1),
213+
),
214+
PlaceElem::Field(Field::from_u32(0), *dest_ty),
215+
],
216+
tcx,
217+
),
218+
),
219+
))),
220+
});
221+
terminator.kind = TerminatorKind::Goto { target };
222+
}
223+
}
194224
_ if intrinsic_name.as_str().starts_with("simd_shuffle") => {
195225
validate_simd_shuffle(tcx, args, terminator.source_info.span);
196226
}

‎compiler/rustc_span/src/symbol.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1044,6 +1044,7 @@ symbols! {
10441044
optin_builtin_traits,
10451045
option,
10461046
option_env,
1047+
option_payload_ptr,
10471048
options,
10481049
or,
10491050
or_patterns,

‎library/core/src/intrinsics.rs

+6
Original file line numberDiff line numberDiff line change
@@ -2214,6 +2214,12 @@ extern "rust-intrinsic" {
22142214
where
22152215
G: FnOnce<ARG, Output = RET>,
22162216
F: FnOnce<ARG, Output = RET>;
2217+
2218+
#[cfg(not(bootstrap))]
2219+
/// This method creates a pointer to any `Some` value. If the argument is
2220+
/// `None`, an invalid within-bounds pointer (that is still acceptable for
2221+
/// constructing an empty slice) is returned.
2222+
pub fn option_payload_ptr<T>(arg: *const Option<T>) -> *const T;
22172223
}
22182224

22192225
// Some functions are defined here because they accidentally got made

‎library/core/src/option.rs

+36-72
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,7 @@ use crate::{
559559
/// The `Option` type. See [the module level documentation](self) for more.
560560
#[derive(Copy, PartialOrd, Eq, Ord, Debug, Hash)]
561561
#[rustc_diagnostic_item = "Option"]
562+
#[cfg_attr(not(bootstrap), lang = "Option")]
562563
#[stable(feature = "rust1", since = "1.0.0")]
563564
pub enum Option<T> {
564565
/// No value.
@@ -735,48 +736,6 @@ impl<T> Option<T> {
735736
}
736737
}
737738

738-
/// This is a guess at how many bytes into the option the payload can be found.
739-
///
740-
/// For niche-optimized types it's correct because it's pigeon-holed to only
741-
/// one possible place. For other types, it's usually correct today, but
742-
/// tweaks to the layout algorithm (particularly expansions of
743-
/// `-Z randomize-layout`) might make it incorrect at any point.
744-
///
745-
/// It's guaranteed to be a multiple of alignment (so will always give a
746-
/// correctly-aligned location) and to be within the allocated object, so
747-
/// is valid to use with `offset` and to use for a zero-sized read.
748-
///
749-
/// FIXME: This is a horrible hack, but allows a nice optimization. It should
750-
/// be replaced with `offset_of!` once that works on enum variants.
751-
const SOME_BYTE_OFFSET_GUESS: isize = {
752-
let some_uninit = Some(mem::MaybeUninit::<T>::uninit());
753-
let payload_ref = some_uninit.as_ref().unwrap();
754-
// SAFETY: `as_ref` gives an address inside the existing `Option`,
755-
// so both pointers are derived from the same thing and the result
756-
// cannot overflow an `isize`.
757-
let offset = unsafe { <*const _>::byte_offset_from(payload_ref, &some_uninit) };
758-
759-
// The offset is into the object, so it's guaranteed to be non-negative.
760-
assert!(offset >= 0);
761-
762-
// The payload and the overall option are aligned,
763-
// so the offset will be a multiple of the alignment too.
764-
assert!((offset as usize) % mem::align_of::<T>() == 0);
765-
766-
let max_offset = mem::size_of::<Self>() - mem::size_of::<T>();
767-
if offset as usize <= max_offset {
768-
// There's enough space after this offset for a `T` to exist without
769-
// overflowing the bounds of the object, so let's try it.
770-
offset
771-
} else {
772-
// The offset guess is definitely wrong, so use the address
773-
// of the original option since we have it already.
774-
// This also correctly handles the case of layout-optimized enums
775-
// where `max_offset == 0` and thus this is the only possibility.
776-
0
777-
}
778-
};
779-
780739
/// Returns a slice of the contained value, if any. If this is `None`, an
781740
/// empty slice is returned. This can be useful to have a single type of
782741
/// iterator over an `Option` or slice.
@@ -809,28 +768,29 @@ impl<T> Option<T> {
809768
#[must_use]
810769
#[unstable(feature = "option_as_slice", issue = "108545")]
811770
pub fn as_slice(&self) -> &[T] {
812-
let payload_ptr: *const T =
813-
// The goal here is that both arms here are calculating exactly
814-
// the same pointer, and thus it'll be folded away when the guessed
815-
// offset is correct, but if the guess is wrong for some reason
816-
// it'll at least still be sound, just no longer optimal.
817-
if let Some(payload) = self {
818-
payload
819-
} else {
820-
let self_ptr: *const Self = self;
821-
// SAFETY: `SOME_BYTE_OFFSET_GUESS` guarantees that its value is
822-
// such that this will be in-bounds of the object.
823-
unsafe { self_ptr.byte_offset(Self::SOME_BYTE_OFFSET_GUESS).cast() }
824-
};
825-
let len = usize::from(self.is_some());
771+
#[cfg(bootstrap)]
772+
match self {
773+
Some(value) => slice::from_ref(value),
774+
None => &[],
775+
}
826776

777+
#[cfg(not(bootstrap))]
827778
// SAFETY: When the `Option` is `Some`, we're using the actual pointer
828779
// to the payload, with a length of 1, so this is equivalent to
829780
// `slice::from_ref`, and thus is safe.
830781
// When the `Option` is `None`, the length used is 0, so to be safe it
831782
// just needs to be aligned, which it is because `&self` is aligned and
832783
// the offset used is a multiple of alignment.
833-
unsafe { slice::from_raw_parts(payload_ptr, len) }
784+
//
785+
// In the new version, the intrinsic always returns a pointer to an
786+
// in-bounds and correctly aligned position for a `T` (even if in the
787+
// `None` case it's just padding).
788+
unsafe {
789+
slice::from_raw_parts(
790+
crate::intrinsics::option_payload_ptr(crate::ptr::from_ref(self)),
791+
usize::from(self.is_some()),
792+
)
793+
}
834794
}
835795

836796
/// Returns a mutable slice of the contained value, if any. If this is
@@ -875,28 +835,32 @@ impl<T> Option<T> {
875835
#[must_use]
876836
#[unstable(feature = "option_as_slice", issue = "108545")]
877837
pub fn as_mut_slice(&mut self) -> &mut [T] {
878-
let payload_ptr: *mut T =
879-
// The goal here is that both arms here are calculating exactly
880-
// the same pointer, and thus it'll be folded away when the guessed
881-
// offset is correct, but if the guess is wrong for some reason
882-
// it'll at least still be sound, just no longer optimal.
883-
if let Some(payload) = self {
884-
payload
885-
} else {
886-
let self_ptr: *mut Self = self;
887-
// SAFETY: `SOME_BYTE_OFFSET_GUESS` guarantees that its value is
888-
// such that this will be in-bounds of the object.
889-
unsafe { self_ptr.byte_offset(Self::SOME_BYTE_OFFSET_GUESS).cast() }
890-
};
891-
let len = usize::from(self.is_some());
838+
#[cfg(bootstrap)]
839+
match self {
840+
Some(value) => slice::from_mut(value),
841+
None => &mut [],
842+
}
892843

844+
#[cfg(not(bootstrap))]
893845
// SAFETY: When the `Option` is `Some`, we're using the actual pointer
894846
// to the payload, with a length of 1, so this is equivalent to
895847
// `slice::from_mut`, and thus is safe.
896848
// When the `Option` is `None`, the length used is 0, so to be safe it
897849
// just needs to be aligned, which it is because `&self` is aligned and
898850
// the offset used is a multiple of alignment.
899-
unsafe { slice::from_raw_parts_mut(payload_ptr, len) }
851+
//
852+
// In the new version, the intrinsic creates a `*const T` from a
853+
// mutable reference so it is safe to cast back to a mutable pointer
854+
// here. As with `as_slice`, the intrinsic always returns a pointer to
855+
// an in-bounds and correctly aligned position for a `T` (even if in
856+
// the `None` case it's just padding).
857+
unsafe {
858+
slice::from_raw_parts_mut(
859+
crate::intrinsics::option_payload_ptr(crate::ptr::from_mut(self).cast_const())
860+
.cast_mut(),
861+
usize::from(self.is_some()),
862+
)
863+
}
900864
}
901865

902866
/////////////////////////////////////////////////////////////////////////
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
- // MIR for `option_payload` before LowerIntrinsics
2+
+ // MIR for `option_payload` after LowerIntrinsics
3+
4+
fn option_payload(_1: &Option<usize>, _2: &Option<String>) -> () {
5+
debug o => _1; // in scope 0 at $DIR/lower_intrinsics.rs:+0:23: +0:24
6+
debug p => _2; // in scope 0 at $DIR/lower_intrinsics.rs:+0:42: +0:43
7+
let mut _0: (); // return place in scope 0 at $DIR/lower_intrinsics.rs:+0:62: +0:62
8+
let mut _4: *const std::option::Option<usize>; // in scope 0 at $DIR/lower_intrinsics.rs:+2:55: +2:56
9+
let mut _6: *const std::option::Option<std::string::String>; // in scope 0 at $DIR/lower_intrinsics.rs:+3:55: +3:56
10+
scope 1 {
11+
let _3: *const usize; // in scope 1 at $DIR/lower_intrinsics.rs:+2:13: +2:15
12+
scope 2 {
13+
debug _x => _3; // in scope 2 at $DIR/lower_intrinsics.rs:+2:13: +2:15
14+
let _5: *const std::string::String; // in scope 2 at $DIR/lower_intrinsics.rs:+3:13: +3:15
15+
scope 3 {
16+
debug _y => _5; // in scope 3 at $DIR/lower_intrinsics.rs:+3:13: +3:15
17+
}
18+
}
19+
}
20+
21+
bb0: {
22+
StorageLive(_3); // scope 1 at $DIR/lower_intrinsics.rs:+2:13: +2:15
23+
StorageLive(_4); // scope 1 at $DIR/lower_intrinsics.rs:+2:55: +2:56
24+
_4 = &raw const (*_1); // scope 1 at $DIR/lower_intrinsics.rs:+2:55: +2:56
25+
- _3 = option_payload_ptr::<usize>(move _4) -> bb1; // scope 1 at $DIR/lower_intrinsics.rs:+2:18: +2:57
26+
- // mir::Constant
27+
- // + span: $DIR/lower_intrinsics.rs:99:18: 99:54
28+
- // + literal: Const { ty: unsafe extern "rust-intrinsic" fn(*const Option<usize>) -> *const usize {option_payload_ptr::<usize>}, val: Value(<ZST>) }
29+
+ _3 = &raw const (((*_4) as Some).0: usize); // scope 1 at $DIR/lower_intrinsics.rs:+2:18: +2:57
30+
+ goto -> bb1; // scope 1 at $DIR/lower_intrinsics.rs:+2:18: +2:57
31+
}
32+
33+
bb1: {
34+
StorageDead(_4); // scope 1 at $DIR/lower_intrinsics.rs:+2:56: +2:57
35+
StorageLive(_5); // scope 2 at $DIR/lower_intrinsics.rs:+3:13: +3:15
36+
StorageLive(_6); // scope 2 at $DIR/lower_intrinsics.rs:+3:55: +3:56
37+
_6 = &raw const (*_2); // scope 2 at $DIR/lower_intrinsics.rs:+3:55: +3:56
38+
- _5 = option_payload_ptr::<String>(move _6) -> bb2; // scope 2 at $DIR/lower_intrinsics.rs:+3:18: +3:57
39+
- // mir::Constant
40+
- // + span: $DIR/lower_intrinsics.rs:100:18: 100:54
41+
- // + literal: Const { ty: unsafe extern "rust-intrinsic" fn(*const Option<String>) -> *const String {option_payload_ptr::<String>}, val: Value(<ZST>) }
42+
+ _5 = &raw const (((*_6) as Some).0: std::string::String); // scope 2 at $DIR/lower_intrinsics.rs:+3:18: +3:57
43+
+ goto -> bb2; // scope 2 at $DIR/lower_intrinsics.rs:+3:18: +3:57
44+
}
45+
46+
bb2: {
47+
StorageDead(_6); // scope 2 at $DIR/lower_intrinsics.rs:+3:56: +3:57
48+
_0 = const (); // scope 1 at $DIR/lower_intrinsics.rs:+1:5: +4:6
49+
StorageDead(_5); // scope 2 at $DIR/lower_intrinsics.rs:+4:5: +4:6
50+
StorageDead(_3); // scope 1 at $DIR/lower_intrinsics.rs:+4:5: +4:6
51+
return; // scope 0 at $DIR/lower_intrinsics.rs:+5:2: +5:2
52+
}
53+
}
54+

‎tests/mir-opt/lower_intrinsics.rs

+9
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,12 @@ pub fn read_via_copy_uninhabited(r: &Never) -> Never {
9191
}
9292

9393
pub enum Never {}
94+
95+
// EMIT_MIR lower_intrinsics.option_payload.LowerIntrinsics.diff
96+
#[cfg(not(bootstrap))]
97+
pub fn option_payload(o: &Option<usize>, p: &Option<String>) {
98+
unsafe {
99+
let _x = core::intrinsics::option_payload_ptr(o);
100+
let _y = core::intrinsics::option_payload_ptr(p);
101+
}
102+
}

0 commit comments

Comments
 (0)
Please sign in to comment.