Skip to content

Commit

Permalink
Auto merge of #129392 - compiler-errors:raw-ref-op-doesnt-diverge-but…
Browse files Browse the repository at this point in the history
…-more, r=lcnr

Do not consider match/let/ref of place that evaluates to `!` to diverge, disallow coercions from them too

Fixes #117288.

This PR implements a heuristic which disables two things that are currently being performed on the HIR when we have **expressions that involve place-like expressions that point to `!`**. Specifically, it will (in certain cases explained below):

### (1.) Disable the `NeverToAny` coercion we implicitly insert for `!`.

Which fixes this inadvertent, sneaky unsoundness:

```
unsafe {
    let x: *const ! = &0 as *const u8 as *const !;
    let _: () = *x;
}
```

which is UB because currently rust emits an *implicit* NeverToAny coercion even though we really shouldn't be, since there's no read of the value pointed by `x`.

### (2.) Disable the logic which considers expression which evaluate to `!` to diverge, which affects the type returned by the containing block.

Which fixes this unsoundness:

```
fn make_up_a_value<T>() -> T {
    unsafe {
        let x: *const ! = &0 as *const u8 as *const !;
        let _ = *x;
    }
}
```

We disable these two operations **if** the expression is a place-like expression (locals, statics, field projections, index operations, and deref operations), and if the parent expression is either:
(1.) the LHS of an assignment
(2.) AddrOf
(3.) A match or let **unless** all of the *patterns consitute a read*, which is explained below:

And finally, a pattern currently is considered to constitute a read **unless** it is a wildcard, or an OR pattern. An OR pattern is considered to constitute a read if all of its subpatterns constitute a read, to remain as conservative as possible in cases like `_ | subpat` or `subpat | _`.

All other patterns are considered currently to constitute a read. Specifically, because `NeverToAny` is a coercion performed on a *value* and not a *place*, `Struct { .. }` on a `!` type must be a coercion currently, and we currently rely on this behavior to allow us to perform coercions like `let _: i32 = x;` where `x: !`.

This is already considered UB by [miri](https://play.rust-lang.org/?version=nightly&mode=debug&edition=2021&gist=daf3a2246433fe43fdc07d1389c276c9), but also means it does not affect the preexisting UB in this case:

```
let Struct { .. } = *never_ptr;
```

Even though it's likely up for debate since we're not actually reading any data out of the struct, it almost certainly causes inference changes which I do *NOT* want to fix in this PR.
  • Loading branch information
bors committed Oct 6, 2024
2 parents 68301a6 + e8d5eb2 commit c2c0e52
Show file tree
Hide file tree
Showing 15 changed files with 634 additions and 32 deletions.
46 changes: 35 additions & 11 deletions compiler/rustc_hir_typeck/src/coercion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ struct Coerce<'a, 'tcx> {
/// See #47489 and #48598
/// See docs on the "AllowTwoPhase" type for a more detailed discussion
allow_two_phase: AllowTwoPhase,
/// Whether we allow `NeverToAny` coercions. This is unsound if we're
/// coercing a place expression without it counting as a read in the MIR.
/// This is a side-effect of HIR not really having a great distinction
/// between places and values.
coerce_never: bool,
}

impl<'a, 'tcx> Deref for Coerce<'a, 'tcx> {
Expand Down Expand Up @@ -125,8 +130,9 @@ impl<'f, 'tcx> Coerce<'f, 'tcx> {
fcx: &'f FnCtxt<'f, 'tcx>,
cause: ObligationCause<'tcx>,
allow_two_phase: AllowTwoPhase,
coerce_never: bool,
) -> Self {
Coerce { fcx, cause, allow_two_phase, use_lub: false }
Coerce { fcx, cause, allow_two_phase, use_lub: false, coerce_never }
}

fn unify(&self, a: Ty<'tcx>, b: Ty<'tcx>) -> InferResult<'tcx, Ty<'tcx>> {
Expand Down Expand Up @@ -177,7 +183,12 @@ impl<'f, 'tcx> Coerce<'f, 'tcx> {

// Coercing from `!` to any type is allowed:
if a.is_never() {
return success(simple(Adjust::NeverToAny)(b), b, vec![]);
if self.coerce_never {
return success(simple(Adjust::NeverToAny)(b), b, vec![]);
} else {
// Otherwise the only coercion we can do is unification.
return self.unify_and(a, b, identity);
}
}

// Coercing *from* an unresolved inference variable means that
Expand Down Expand Up @@ -1038,7 +1049,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
/// The expressions *must not* have any preexisting adjustments.
pub(crate) fn coerce(
&self,
expr: &hir::Expr<'_>,
expr: &'tcx hir::Expr<'tcx>,
expr_ty: Ty<'tcx>,
mut target: Ty<'tcx>,
allow_two_phase: AllowTwoPhase,
Expand All @@ -1055,7 +1066,12 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {

let cause =
cause.unwrap_or_else(|| self.cause(expr.span, ObligationCauseCode::ExprAssignable));
let coerce = Coerce::new(self, cause, allow_two_phase);
let coerce = Coerce::new(
self,
cause,
allow_two_phase,
self.expr_guaranteed_to_constitute_read_for_never(expr),
);
let ok = self.commit_if_ok(|_| coerce.coerce(source, target))?;

let (adjustments, _) = self.register_infer_ok_obligations(ok);
Expand All @@ -1077,8 +1093,9 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
debug!("coercion::can_with_predicates({:?} -> {:?})", source, target);

let cause = self.cause(DUMMY_SP, ObligationCauseCode::ExprAssignable);
// We don't ever need two-phase here since we throw out the result of the coercion
let coerce = Coerce::new(self, cause, AllowTwoPhase::No);
// We don't ever need two-phase here since we throw out the result of the coercion.
// We also just always set `coerce_never` to true, since this is a heuristic.
let coerce = Coerce::new(self, cause, AllowTwoPhase::No, true);
self.probe(|_| {
let Ok(ok) = coerce.coerce(source, target) else {
return false;
Expand All @@ -1090,12 +1107,16 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
}

/// Given a type and a target type, this function will calculate and return
/// how many dereference steps needed to achieve `expr_ty <: target`. If
/// how many dereference steps needed to coerce `expr_ty` to `target`. If
/// it's not possible, return `None`.
pub(crate) fn deref_steps(&self, expr_ty: Ty<'tcx>, target: Ty<'tcx>) -> Option<usize> {
pub(crate) fn deref_steps_for_suggestion(
&self,
expr_ty: Ty<'tcx>,
target: Ty<'tcx>,
) -> Option<usize> {
let cause = self.cause(DUMMY_SP, ObligationCauseCode::ExprAssignable);
// We don't ever need two-phase here since we throw out the result of the coercion
let coerce = Coerce::new(self, cause, AllowTwoPhase::No);
// We don't ever need two-phase here since we throw out the result of the coercion.
let coerce = Coerce::new(self, cause, AllowTwoPhase::No, true);
coerce
.autoderef(DUMMY_SP, expr_ty)
.find_map(|(ty, steps)| self.probe(|_| coerce.unify(ty, target)).ok().map(|_| steps))
Expand Down Expand Up @@ -1252,7 +1273,10 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
// probably aren't processing function arguments here and even if we were,
// they're going to get autorefed again anyway and we can apply 2-phase borrows
// at that time.
let mut coerce = Coerce::new(self, cause.clone(), AllowTwoPhase::No);
//
// NOTE: we set `coerce_never` to `true` here because coercion LUBs only
// operate on values and not places, so a never coercion is valid.
let mut coerce = Coerce::new(self, cause.clone(), AllowTwoPhase::No, true);
coerce.use_lub = true;

// First try to coerce the new expression to the type of the previous ones,
Expand Down
191 changes: 188 additions & 3 deletions compiler/rustc_hir_typeck/src/expr.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// ignore-tidy-filelength
// FIXME: we should move the field error reporting code somewhere else.

//! Type checking expressions.
//!
//! See [`rustc_hir_analysis::check`] for more context on type checking in general.
Expand Down Expand Up @@ -62,7 +65,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {

// While we don't allow *arbitrary* coercions here, we *do* allow
// coercions from ! to `expected`.
if ty.is_never() {
if ty.is_never() && self.expr_guaranteed_to_constitute_read_for_never(expr) {
if let Some(_) = self.typeck_results.borrow().adjustments().get(expr.hir_id) {
let reported = self.dcx().span_delayed_bug(
expr.span,
Expand Down Expand Up @@ -238,8 +241,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
_ => self.warn_if_unreachable(expr.hir_id, expr.span, "expression"),
}

// Any expression that produces a value of type `!` must have diverged
if ty.is_never() {
// Any expression that produces a value of type `!` must have diverged,
// unless it's a place expression that isn't being read from, in which case
// diverging would be unsound since we may never actually read the `!`.
// e.g. `let _ = *never_ptr;` with `never_ptr: *const !`.
if ty.is_never() && self.expr_guaranteed_to_constitute_read_for_never(expr) {
self.diverges.set(self.diverges.get() | Diverges::always(expr.span));
}

Expand All @@ -257,6 +263,185 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
ty
}

/// Whether this expression constitutes a read of value of the type that
/// it evaluates to.
///
/// This is used to determine if we should consider the block to diverge
/// if the expression evaluates to `!`, and if we should insert a `NeverToAny`
/// coercion for values of type `!`.
///
/// This function generally returns `false` if the expression is a place
/// expression and the *parent* expression is the scrutinee of a match or
/// the pointee of an `&` addr-of expression, since both of those parent
/// expressions take a *place* and not a value.
pub(super) fn expr_guaranteed_to_constitute_read_for_never(
&self,
expr: &'tcx hir::Expr<'tcx>,
) -> bool {
// We only care about place exprs. Anything else returns an immediate
// which would constitute a read. We don't care about distinguishing
// "syntactic" place exprs since if the base of a field projection is
// not a place then it would've been UB to read from it anyways since
// that constitutes a read.
if !expr.is_syntactic_place_expr() {
return true;
}

let parent_node = self.tcx.parent_hir_node(expr.hir_id);
match parent_node {
hir::Node::Expr(parent_expr) => {
match parent_expr.kind {
// Addr-of, field projections, and LHS of assignment don't constitute reads.
// Assignment does call `drop_in_place`, though, but its safety
// requirements are not the same.
ExprKind::AddrOf(..) | hir::ExprKind::Field(..) => false,
ExprKind::Assign(lhs, _, _) => {
// Only the LHS does not constitute a read
expr.hir_id != lhs.hir_id
}

// See note on `PatKind::Or` below for why this is `all`.
ExprKind::Match(scrutinee, arms, _) => {
assert_eq!(scrutinee.hir_id, expr.hir_id);
arms.iter()
.all(|arm| self.pat_guaranteed_to_constitute_read_for_never(arm.pat))
}
ExprKind::Let(hir::LetExpr { init, pat, .. }) => {
assert_eq!(init.hir_id, expr.hir_id);
self.pat_guaranteed_to_constitute_read_for_never(*pat)
}

// Any expression child of these expressions constitute reads.
ExprKind::Array(_)
| ExprKind::Call(_, _)
| ExprKind::MethodCall(_, _, _, _)
| ExprKind::Tup(_)
| ExprKind::Binary(_, _, _)
| ExprKind::Unary(_, _)
| ExprKind::Cast(_, _)
| ExprKind::Type(_, _)
| ExprKind::DropTemps(_)
| ExprKind::If(_, _, _)
| ExprKind::Closure(_)
| ExprKind::Block(_, _)
| ExprKind::AssignOp(_, _, _)
| ExprKind::Index(_, _, _)
| ExprKind::Break(_, _)
| ExprKind::Ret(_)
| ExprKind::Become(_)
| ExprKind::InlineAsm(_)
| ExprKind::Struct(_, _, _)
| ExprKind::Repeat(_, _)
| ExprKind::Yield(_, _) => true,

// These expressions have no (direct) sub-exprs.
ExprKind::ConstBlock(_)
| ExprKind::Loop(_, _, _, _)
| ExprKind::Lit(_)
| ExprKind::Path(_)
| ExprKind::Continue(_)
| ExprKind::OffsetOf(_, _)
| ExprKind::Err(_) => unreachable!("no sub-expr expected for {:?}", expr.kind),
}
}

// If we have a subpattern that performs a read, we want to consider this
// to diverge for compatibility to support something like `let x: () = *never_ptr;`.
hir::Node::LetStmt(hir::LetStmt { init: Some(target), pat, .. }) => {
assert_eq!(target.hir_id, expr.hir_id);
self.pat_guaranteed_to_constitute_read_for_never(*pat)
}

// These nodes (if they have a sub-expr) do constitute a read.
hir::Node::Block(_)
| hir::Node::Arm(_)
| hir::Node::ExprField(_)
| hir::Node::AnonConst(_)
| hir::Node::ConstBlock(_)
| hir::Node::ConstArg(_)
| hir::Node::Stmt(_)
| hir::Node::Item(hir::Item {
kind: hir::ItemKind::Const(..) | hir::ItemKind::Static(..),
..
})
| hir::Node::TraitItem(hir::TraitItem {
kind: hir::TraitItemKind::Const(..), ..
})
| hir::Node::ImplItem(hir::ImplItem { kind: hir::ImplItemKind::Const(..), .. }) => true,

// These nodes do not have direct sub-exprs.
hir::Node::Param(_)
| hir::Node::Item(_)
| hir::Node::ForeignItem(_)
| hir::Node::TraitItem(_)
| hir::Node::ImplItem(_)
| hir::Node::Variant(_)
| hir::Node::Field(_)
| hir::Node::PathSegment(_)
| hir::Node::Ty(_)
| hir::Node::AssocItemConstraint(_)
| hir::Node::TraitRef(_)
| hir::Node::Pat(_)
| hir::Node::PatField(_)
| hir::Node::LetStmt(_)
| hir::Node::Synthetic
| hir::Node::Err(_)
| hir::Node::Ctor(_)
| hir::Node::Lifetime(_)
| hir::Node::GenericParam(_)
| hir::Node::Crate(_)
| hir::Node::Infer(_)
| hir::Node::WhereBoundPredicate(_)
| hir::Node::ArrayLenInfer(_)
| hir::Node::PreciseCapturingNonLifetimeArg(_)
| hir::Node::OpaqueTy(_) => {
unreachable!("no sub-expr expected for {parent_node:?}")
}
}
}

/// Whether this pattern constitutes a read of value of the scrutinee that
/// it is matching against. This is used to determine whether we should
/// perform `NeverToAny` coercions.
///
/// See above for the nuances of what happens when this returns true.
pub(super) fn pat_guaranteed_to_constitute_read_for_never(&self, pat: &hir::Pat<'_>) -> bool {
match pat.kind {
// Does not constitute a read.
hir::PatKind::Wild => false,

// This is unnecessarily restrictive when the pattern that doesn't
// constitute a read is unreachable.
//
// For example `match *never_ptr { value => {}, _ => {} }` or
// `match *never_ptr { _ if false => {}, value => {} }`.
//
// It is however fine to be restrictive here; only returning `true`
// can lead to unsoundness.
hir::PatKind::Or(subpats) => {
subpats.iter().all(|pat| self.pat_guaranteed_to_constitute_read_for_never(pat))
}

// Does constitute a read, since it is equivalent to a discriminant read.
hir::PatKind::Never => true,

// All of these constitute a read, or match on something that isn't `!`,
// which would require a `NeverToAny` coercion.
hir::PatKind::Binding(_, _, _, _)
| hir::PatKind::Struct(_, _, _)
| hir::PatKind::TupleStruct(_, _, _)
| hir::PatKind::Path(_)
| hir::PatKind::Tuple(_, _)
| hir::PatKind::Box(_)
| hir::PatKind::Ref(_, _)
| hir::PatKind::Deref(_)
| hir::PatKind::Lit(_)
| hir::PatKind::Range(_, _, _)
| hir::PatKind::Slice(_, _, _)
| hir::PatKind::Err(_) => true,
}
}

#[instrument(skip(self, expr), level = "debug")]
fn check_expr_kind(
&self,
Expand Down
6 changes: 3 additions & 3 deletions compiler/rustc_hir_typeck/src/fn_ctxt/suggestions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2608,7 +2608,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
}

if let hir::ExprKind::Unary(hir::UnOp::Deref, inner) = expr.kind
&& let Some(1) = self.deref_steps(expected, checked_ty)
&& let Some(1) = self.deref_steps_for_suggestion(expected, checked_ty)
{
// We have `*&T`, check if what was expected was `&T`.
// If so, we may want to suggest removing a `*`.
Expand Down Expand Up @@ -2738,7 +2738,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
}
}
(_, &ty::RawPtr(ty_b, mutbl_b), &ty::Ref(_, ty_a, mutbl_a)) => {
if let Some(steps) = self.deref_steps(ty_a, ty_b)
if let Some(steps) = self.deref_steps_for_suggestion(ty_a, ty_b)
// Only suggest valid if dereferencing needed.
&& steps > 0
// The pointer type implements `Copy` trait so the suggestion is always valid.
Expand Down Expand Up @@ -2782,7 +2782,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
}
}
_ if sp == expr.span => {
if let Some(mut steps) = self.deref_steps(checked_ty, expected) {
if let Some(mut steps) = self.deref_steps_for_suggestion(checked_ty, expected) {
let mut expr = expr.peel_blocks();
let mut prefix_span = expr.span.shrink_to_lo();
let mut remove = String::new();
Expand Down
Loading

0 comments on commit c2c0e52

Please sign in to comment.