diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs
index b8e37b604801f..c8b7616e64509 100644
--- a/compiler/rustc_codegen_gcc/src/builder.rs
+++ b/compiler/rustc_codegen_gcc/src/builder.rs
@@ -989,10 +989,14 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
             OperandValue::Ref(place.val)
         } else if place.layout.is_gcc_immediate() {
             let load = self.load(place.layout.gcc_type(self), place.val.llval, place.val.align);
-            if let abi::BackendRepr::Scalar(ref scalar) = place.layout.backend_repr {
-                scalar_load_metadata(self, load, scalar);
-            }
-            OperandValue::Immediate(self.to_immediate(load, place.layout))
+            OperandValue::Immediate(
+                if let abi::BackendRepr::Scalar(ref scalar) = place.layout.backend_repr {
+                    scalar_load_metadata(self, load, scalar);
+                    self.to_immediate_scalar(load, *scalar)
+                } else {
+                    load
+                },
+            )
         } else if let abi::BackendRepr::ScalarPair(ref a, ref b) = place.layout.backend_repr {
             let b_offset = a.size(self).align_to(b.align(self).abi);
 
@@ -1694,7 +1698,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
 
     fn to_immediate_scalar(&mut self, val: Self::Value, scalar: abi::Scalar) -> Self::Value {
         if scalar.is_bool() {
-            return self.trunc(val, self.cx().type_i1());
+            return self.unchecked_utrunc(val, self.cx().type_i1());
         }
         val
     }
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
index a1123fafe2f30..5322b731d8bb5 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
@@ -9,7 +9,7 @@ use gccjit::FunctionType;
 use gccjit::{ComparisonOp, Function, RValue, ToRValue, Type, UnaryOp};
 #[cfg(feature = "master")]
 use rustc_abi::ExternAbi;
-use rustc_abi::HasDataLayout;
+use rustc_abi::{BackendRepr, HasDataLayout};
 use rustc_codegen_ssa::MemFlags;
 use rustc_codegen_ssa::base::wants_msvc_seh;
 use rustc_codegen_ssa::common::IntPredicate;
@@ -181,14 +181,19 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
             sym::volatile_load | sym::unaligned_volatile_load => {
                 let tp_ty = fn_args.type_at(0);
                 let ptr = args[0].immediate();
+                let layout = self.layout_of(tp_ty);
                 let load = if let PassMode::Cast { cast: ref ty, pad_i32: _ } = fn_abi.ret.mode {
                     let gcc_ty = ty.gcc_type(self);
                     self.volatile_load(gcc_ty, ptr)
                 } else {
-                    self.volatile_load(self.layout_of(tp_ty).gcc_type(self), ptr)
+                    self.volatile_load(layout.gcc_type(self), ptr)
                 };
                 // TODO(antoyo): set alignment.
-                self.to_immediate(load, self.layout_of(tp_ty))
+                if let BackendRepr::Scalar(scalar) = layout.backend_repr {
+                    self.to_immediate_scalar(load, scalar)
+                } else {
+                    load
+                }
             }
             sym::volatile_store => {
                 let dst = args[0].deref(self.cx());
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index e10a4d63407c0..e1609e31c0707 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -29,13 +29,13 @@ use smallvec::SmallVec;
 use tracing::{debug, instrument};
 
 use crate::abi::FnAbiLlvmExt;
-use crate::attributes;
 use crate::common::Funclet;
 use crate::context::{CodegenCx, SimpleCx};
 use crate::llvm::{self, AtomicOrdering, AtomicRmwBinOp, BasicBlock, False, Metadata, True};
 use crate::type_::Type;
 use crate::type_of::LayoutLlvmExt;
 use crate::value::Value;
+use crate::{attributes, llvm_util};
 
 #[must_use]
 pub(crate) struct GenericBuilder<'a, 'll, CX: Borrow<SimpleCx<'ll>>> {
@@ -606,7 +606,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
 
     fn to_immediate_scalar(&mut self, val: Self::Value, scalar: abi::Scalar) -> Self::Value {
         if scalar.is_bool() {
-            return self.trunc(val, self.cx().type_i1());
+            return self.unchecked_utrunc(val, self.cx().type_i1());
         }
         val
     }
@@ -746,10 +746,12 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
                 let load = self.load(llty, place.val.llval, place.val.align);
                 if let abi::BackendRepr::Scalar(scalar) = place.layout.backend_repr {
                     scalar_load_metadata(self, load, scalar, place.layout, Size::ZERO);
+                    self.to_immediate_scalar(load, scalar)
+                } else {
+                    load
                 }
-                load
             });
-            OperandValue::Immediate(self.to_immediate(llval, place.layout))
+            OperandValue::Immediate(llval)
         } else if let abi::BackendRepr::ScalarPair(a, b) = place.layout.backend_repr {
             let b_offset = a.size(self).align_to(b.align(self).abi);
 
@@ -942,6 +944,34 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         unsafe { llvm::LLVMBuildTrunc(self.llbuilder, val, dest_ty, UNNAMED) }
     }
 
+    fn unchecked_utrunc(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
+        debug_assert_ne!(self.val_ty(val), dest_ty);
+
+        let trunc = self.trunc(val, dest_ty);
+        if llvm_util::get_version() >= (19, 0, 0) {
+            unsafe {
+                if llvm::LLVMIsAInstruction(trunc).is_some() {
+                    llvm::LLVMSetNUW(trunc, True);
+                }
+            }
+        }
+        trunc
+    }
+
+    fn unchecked_strunc(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
+        debug_assert_ne!(self.val_ty(val), dest_ty);
+
+        let trunc = self.trunc(val, dest_ty);
+        if llvm_util::get_version() >= (19, 0, 0) {
+            unsafe {
+                if llvm::LLVMIsAInstruction(trunc).is_some() {
+                    llvm::LLVMSetNSW(trunc, True);
+                }
+            }
+        }
+        trunc
+    }
+
     fn sext(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
         unsafe { llvm::LLVMBuildSExt(self.llbuilder, val, dest_ty, UNNAMED) }
     }
diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs
index e800492dad02f..a989e9c64c0f9 100644
--- a/compiler/rustc_codegen_ssa/src/base.rs
+++ b/compiler/rustc_codegen_ssa/src/base.rs
@@ -24,7 +24,7 @@ use rustc_middle::query::Providers;
 use rustc_middle::ty::layout::{HasTyCtxt, HasTypingEnv, LayoutOf, TyAndLayout};
 use rustc_middle::ty::{self, Instance, Ty, TyCtxt};
 use rustc_session::Session;
-use rustc_session::config::{self, CrateType, EntryFnType, OptLevel, OutputType};
+use rustc_session::config::{self, CrateType, EntryFnType, OutputType};
 use rustc_span::{DUMMY_SP, Symbol, sym};
 use rustc_trait_selection::infer::{BoundRegionConversionTime, TyCtxtInferExt};
 use rustc_trait_selection::traits::{ObligationCause, ObligationCtxt};
@@ -364,13 +364,7 @@ pub(crate) fn build_shift_expr_rhs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
     let rhs_sz = bx.cx().int_width(rhs_llty);
     let lhs_sz = bx.cx().int_width(lhs_llty);
     if lhs_sz < rhs_sz {
-        if is_unchecked && bx.sess().opts.optimize != OptLevel::No {
-            // FIXME: Use `trunc nuw` once that's available
-            let inrange = bx.icmp(IntPredicate::IntULE, rhs, mask);
-            bx.assume(inrange);
-        }
-
-        bx.trunc(rhs, lhs_llty)
+        if is_unchecked { bx.unchecked_utrunc(rhs, lhs_llty) } else { bx.trunc(rhs, lhs_llty) }
     } else if lhs_sz > rhs_sz {
         // We zero-extend even if the RHS is signed. So e.g. `(x: i32) << -1i8` will zero-extend the
         // RHS to `255i32`. But then we mask the shift amount to be within the size of the LHS
diff --git a/compiler/rustc_codegen_ssa/src/mir/block.rs b/compiler/rustc_codegen_ssa/src/mir/block.rs
index 616d748a2995a..0620f08fc731b 100644
--- a/compiler/rustc_codegen_ssa/src/mir/block.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/block.rs
@@ -1040,7 +1040,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                             let (idx, _) = op.layout.non_1zst_field(bx).expect(
                                 "not exactly one non-1-ZST field in a `DispatchFromDyn` type",
                             );
-                            op = op.extract_field(bx, idx);
+                            op = op.extract_field(self, bx, idx);
                         }
 
                         // Now that we have `*dyn Trait` or `&dyn Trait`, split it up into its
@@ -1072,7 +1072,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                             let (idx, _) = op.layout.non_1zst_field(bx).expect(
                                 "not exactly one non-1-ZST field in a `DispatchFromDyn` type",
                             );
-                            op = op.extract_field(bx, idx);
+                            op = op.extract_field(self, bx, idx);
                         }
 
                         // Make sure that we've actually unwrapped the rcvr down
@@ -1572,9 +1572,9 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                     if scalar.is_bool() {
                         bx.range_metadata(llval, WrappingRange { start: 0, end: 1 });
                     }
+                    // We store bools as `i8` so we need to truncate to `i1`.
+                    llval = bx.to_immediate_scalar(llval, scalar);
                 }
-                // We store bools as `i8` so we need to truncate to `i1`.
-                llval = bx.to_immediate(llval, arg.layout);
             }
         }
 
@@ -1604,7 +1604,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
         } else {
             // If the tuple is immediate, the elements are as well.
             for i in 0..tuple.layout.fields.count() {
-                let op = tuple.extract_field(bx, i);
+                let op = tuple.extract_field(self, bx, i);
                 self.codegen_argument(bx, op, llargs, &args[i]);
             }
         }
diff --git a/compiler/rustc_codegen_ssa/src/mir/operand.rs b/compiler/rustc_codegen_ssa/src/mir/operand.rs
index 9ca7d4f8f0047..958a52a2cb1dc 100644
--- a/compiler/rustc_codegen_ssa/src/mir/operand.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/operand.rs
@@ -1,15 +1,14 @@
-use std::assert_matches::assert_matches;
 use std::fmt;
 
 use arrayvec::ArrayVec;
 use either::Either;
 use rustc_abi as abi;
 use rustc_abi::{Align, BackendRepr, Size};
-use rustc_middle::bug;
 use rustc_middle::mir::interpret::{Pointer, Scalar, alloc_range};
 use rustc_middle::mir::{self, ConstValue};
 use rustc_middle::ty::Ty;
 use rustc_middle::ty::layout::{LayoutOf, TyAndLayout};
+use rustc_middle::{bug, span_bug};
 use tracing::debug;
 
 use super::place::{PlaceRef, PlaceValue};
@@ -352,79 +351,83 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
 
     pub(crate) fn extract_field<Bx: BuilderMethods<'a, 'tcx, Value = V>>(
         &self,
+        fx: &mut FunctionCx<'a, 'tcx, Bx>,
         bx: &mut Bx,
         i: usize,
     ) -> Self {
         let field = self.layout.field(bx.cx(), i);
         let offset = self.layout.fields.offset(i);
 
-        let mut val = match (self.val, self.layout.backend_repr) {
-            // If the field is ZST, it has no data.
-            _ if field.is_zst() => OperandValue::ZeroSized,
-
-            // Newtype of a scalar, scalar pair or vector.
-            (OperandValue::Immediate(_) | OperandValue::Pair(..), _)
-                if field.size == self.layout.size =>
-            {
-                assert_eq!(offset.bytes(), 0);
-                self.val
+        let val = if field.is_zst() {
+            OperandValue::ZeroSized
+        } else if field.size == self.layout.size {
+            assert_eq!(offset.bytes(), 0);
+            if let Some(field_val) = fx.codegen_transmute_operand(bx, *self, field) {
+                field_val
+            } else {
+                // we have to go through memory for things like
+                // Newtype vector of array, e.g. #[repr(simd)] struct S([i32; 4]);
+                let place = PlaceRef::alloca(bx, field);
+                self.val.store(bx, place.val.with_type(self.layout));
+                bx.load_operand(place).val
             }
-
-            // Extract a scalar component from a pair.
-            (OperandValue::Pair(a_llval, b_llval), BackendRepr::ScalarPair(a, b)) => {
-                if offset.bytes() == 0 {
-                    assert_eq!(field.size, a.size(bx.cx()));
-                    OperandValue::Immediate(a_llval)
-                } else {
-                    assert_eq!(offset, a.size(bx.cx()).align_to(b.align(bx.cx()).abi));
-                    assert_eq!(field.size, b.size(bx.cx()));
-                    OperandValue::Immediate(b_llval)
+        } else {
+            let (in_scalar, imm) = match (self.val, self.layout.backend_repr) {
+                // Extract a scalar component from a pair.
+                (OperandValue::Pair(a_llval, b_llval), BackendRepr::ScalarPair(a, b)) => {
+                    if offset.bytes() == 0 {
+                        assert_eq!(field.size, a.size(bx.cx()));
+                        (Some(a), a_llval)
+                    } else {
+                        assert_eq!(offset, a.size(bx.cx()).align_to(b.align(bx.cx()).abi));
+                        assert_eq!(field.size, b.size(bx.cx()));
+                        (Some(b), b_llval)
+                    }
                 }
-            }
 
-            // `#[repr(simd)]` types are also immediate.
-            (OperandValue::Immediate(llval), BackendRepr::Vector { .. }) => {
-                OperandValue::Immediate(bx.extract_element(llval, bx.cx().const_usize(i as u64)))
-            }
+                // `#[repr(simd)]` types are also immediate.
+                (OperandValue::Immediate(llval), BackendRepr::Vector { .. }) => {
+                    (None, bx.extract_element(llval, bx.cx().const_usize(i as u64)))
+                }
 
-            _ => bug!("OperandRef::extract_field({:?}): not applicable", self),
+                _ => {
+                    span_bug!(fx.mir.span, "OperandRef::extract_field({:?}): not applicable", self)
+                }
+            };
+            OperandValue::Immediate(match field.backend_repr {
+                BackendRepr::Vector { .. } => imm,
+                BackendRepr::Scalar(out_scalar) => {
+                    let Some(in_scalar) = in_scalar else {
+                        span_bug!(
+                            fx.mir.span,
+                            "OperandRef::extract_field({:?}): missing input scalar for output scalar",
+                            self
+                        )
+                    };
+                    if in_scalar != out_scalar {
+                        // If the backend and backend_immediate types might differ,
+                        // flip back to the backend type then to the new immediate.
+                        // This avoids nop truncations, but still handles things like
+                        // Bools in union fields needs to be truncated.
+                        let backend = bx.from_immediate(imm);
+                        bx.to_immediate_scalar(backend, out_scalar)
+                    } else {
+                        imm
+                    }
+                }
+                BackendRepr::Memory { sized: true } => {
+                    span_bug!(
+                        fx.mir.span,
+                        "Projecting into a simd type with padding doesn't work; \
+                         See <https://github.com/rust-lang/rust/issues/137108>",
+                    );
+                }
+                BackendRepr::Uninhabited
+                | BackendRepr::ScalarPair(_, _)
+                | BackendRepr::Memory { sized: false } => bug!(),
+            })
         };
 
-        match (&mut val, field.backend_repr) {
-            (OperandValue::ZeroSized, _) => {}
-            (
-                OperandValue::Immediate(llval),
-                BackendRepr::Scalar(_) | BackendRepr::ScalarPair(..) | BackendRepr::Vector { .. },
-            ) => {
-                // Bools in union fields needs to be truncated.
-                *llval = bx.to_immediate(*llval, field);
-            }
-            (OperandValue::Pair(a, b), BackendRepr::ScalarPair(a_abi, b_abi)) => {
-                // Bools in union fields needs to be truncated.
-                *a = bx.to_immediate_scalar(*a, a_abi);
-                *b = bx.to_immediate_scalar(*b, b_abi);
-            }
-            // Newtype vector of array, e.g. #[repr(simd)] struct S([i32; 4]);
-            (OperandValue::Immediate(llval), BackendRepr::Memory { sized: true }) => {
-                assert_matches!(self.layout.backend_repr, BackendRepr::Vector { .. });
-
-                let llfield_ty = bx.cx().backend_type(field);
-
-                // Can't bitcast an aggregate, so round trip through memory.
-                let llptr = bx.alloca(field.size, field.align.abi);
-                bx.store(*llval, llptr, field.align.abi);
-                *llval = bx.load(llfield_ty, llptr, field.align.abi);
-            }
-            (
-                OperandValue::Immediate(_),
-                BackendRepr::Uninhabited | BackendRepr::Memory { sized: false },
-            ) => {
-                bug!()
-            }
-            (OperandValue::Pair(..), _) => bug!(),
-            (OperandValue::Ref(..), _) => bug!(),
-        }
-
         OperandRef { val, layout: field }
     }
 }
@@ -587,7 +590,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                                 "Bad PlaceRef: destructing pointers should use cast/PtrMetadata, \
                                  but tried to access field {f:?} of pointer {o:?}",
                             );
-                            o = o.extract_field(bx, f.index());
+                            o = o.extract_field(self, bx, f.index());
                         }
                         mir::ProjectionElem::Index(_)
                         | mir::ProjectionElem::ConstantIndex { .. } => {
diff --git a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
index 4c5b183cfe92a..daa4fa90ed704 100644
--- a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
@@ -231,7 +231,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
     ///
     /// Returns `None` for cases that can't work in that framework, such as for
     /// `Immediate`->`Ref` that needs an `alloc` to get the location.
-    fn codegen_transmute_operand(
+    pub(crate) fn codegen_transmute_operand(
         &mut self,
         bx: &mut Bx,
         operand: OperandRef<'tcx, Bx::Value>,
@@ -260,6 +260,8 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
             OperandValue::Ref(source_place_val) => {
                 assert_eq!(source_place_val.llextra, None);
                 assert_matches!(operand_kind, OperandValueKind::Ref);
+                // The existing alignment is part of `source_place_val`,
+                // so that alignment will be used, not `cast`'s.
                 Some(bx.load_operand(source_place_val.with_type(cast)).val)
             }
             OperandValue::ZeroSized => {
diff --git a/compiler/rustc_codegen_ssa/src/traits/builder.rs b/compiler/rustc_codegen_ssa/src/traits/builder.rs
index 48ae000f2c693..2b00ba01946e5 100644
--- a/compiler/rustc_codegen_ssa/src/traits/builder.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/builder.rs
@@ -1,7 +1,7 @@
 use std::assert_matches::assert_matches;
 use std::ops::Deref;
 
-use rustc_abi::{Align, BackendRepr, Scalar, Size, WrappingRange};
+use rustc_abi::{Align, Scalar, Size, WrappingRange};
 use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
 use rustc_middle::ty::layout::{FnAbiOf, LayoutOf, TyAndLayout};
 use rustc_middle::ty::{Instance, Ty};
@@ -223,13 +223,6 @@ pub trait BuilderMethods<'a, 'tcx>:
     ) -> (Self::Value, Self::Value);
 
     fn from_immediate(&mut self, val: Self::Value) -> Self::Value;
-    fn to_immediate(&mut self, val: Self::Value, layout: TyAndLayout<'_>) -> Self::Value {
-        if let BackendRepr::Scalar(scalar) = layout.backend_repr {
-            self.to_immediate_scalar(val, scalar)
-        } else {
-            val
-        }
-    }
     fn to_immediate_scalar(&mut self, val: Self::Value, scalar: Scalar) -> Self::Value;
 
     fn alloca(&mut self, size: Size, align: Align) -> Self::Value;
@@ -340,6 +333,17 @@ pub trait BuilderMethods<'a, 'tcx>:
     }
 
     fn trunc(&mut self, val: Self::Value, dest_ty: Self::Type) -> Self::Value;
+    /// Produces the same value as [`Self::trunc`] (and defaults to that),
+    /// but is UB unless the *zero*-extending the result can reproduce `val`.
+    fn unchecked_utrunc(&mut self, val: Self::Value, dest_ty: Self::Type) -> Self::Value {
+        self.trunc(val, dest_ty)
+    }
+    /// Produces the same value as [`Self::trunc`] (and defaults to that),
+    /// but is UB unless the *sign*-extending the result can reproduce `val`.
+    fn unchecked_strunc(&mut self, val: Self::Value, dest_ty: Self::Type) -> Self::Value {
+        self.trunc(val, dest_ty)
+    }
+
     fn sext(&mut self, val: Self::Value, dest_ty: Self::Type) -> Self::Value;
     fn fptoui_sat(&mut self, val: Self::Value, dest_ty: Self::Type) -> Self::Value;
     fn fptosi_sat(&mut self, val: Self::Value, dest_ty: Self::Type) -> Self::Value;
diff --git a/tests/codegen/intrinsics/transmute-niched.rs b/tests/codegen/intrinsics/transmute-niched.rs
index 5ded0e192fa24..8ff5cc8ee4f4c 100644
--- a/tests/codegen/intrinsics/transmute-niched.rs
+++ b/tests/codegen/intrinsics/transmute-niched.rs
@@ -170,7 +170,7 @@ pub unsafe fn check_bool_from_ordering(x: std::cmp::Ordering) -> bool {
     // OPT: call void @llvm.assume(i1 %2)
     // CHECK-NOT: icmp
     // CHECK-NOT: assume
-    // CHECK: %[[R:.+]] = trunc i8 %x to i1
+    // CHECK: %[[R:.+]] = trunc{{( nuw)?}} i8 %x to i1
     // CHECK: ret i1 %[[R]]
 
     transmute(x)
diff --git a/tests/codegen/intrinsics/transmute.rs b/tests/codegen/intrinsics/transmute.rs
index 5b40a6a12c0fa..ff297b2706578 100644
--- a/tests/codegen/intrinsics/transmute.rs
+++ b/tests/codegen/intrinsics/transmute.rs
@@ -11,6 +11,9 @@ use std::intrinsics::mir::*;
 use std::intrinsics::{transmute, transmute_unchecked};
 use std::mem::MaybeUninit;
 
+// FIXME(LLVM18REMOVED): `trunc nuw` doesn't exist in LLVM 18, so once we no
+// longer support it the optional flag checks can be changed to required.
+
 pub enum ZstNever {}
 
 #[repr(align(2))]
@@ -153,7 +156,7 @@ pub unsafe fn check_from_newtype(x: Scalar64) -> u64 {
 pub unsafe fn check_aggregate_to_bool(x: Aggregate8) -> bool {
     // CHECK: %x = alloca [1 x i8], align 1
     // CHECK: %[[BYTE:.+]] = load i8, ptr %x, align 1
-    // CHECK: %[[BOOL:.+]] = trunc i8 %[[BYTE]] to i1
+    // CHECK: %[[BOOL:.+]] = trunc{{( nuw)?}} i8 %[[BYTE]] to i1
     // CHECK: ret i1 %[[BOOL]]
     transmute(x)
 }
@@ -171,7 +174,7 @@ pub unsafe fn check_aggregate_from_bool(x: bool) -> Aggregate8 {
 #[no_mangle]
 pub unsafe fn check_byte_to_bool(x: u8) -> bool {
     // CHECK-NOT: alloca
-    // CHECK: %[[R:.+]] = trunc i8 %x to i1
+    // CHECK: %[[R:.+]] = trunc{{( nuw)?}} i8 %x to i1
     // CHECK: ret i1 %[[R]]
     transmute(x)
 }
@@ -284,7 +287,7 @@ pub unsafe fn check_long_array_more_aligned(x: [u8; 100]) -> [u32; 25] {
 #[no_mangle]
 pub unsafe fn check_pair_with_bool(x: (u8, bool)) -> (bool, i8) {
     // CHECK-NOT: alloca
-    // CHECK: trunc i8 %x.0 to i1
+    // CHECK: trunc{{( nuw)?}} i8 %x.0 to i1
     // CHECK: zext i1 %x.1 to i8
     transmute(x)
 }
@@ -338,7 +341,7 @@ pub unsafe fn check_heterogeneous_integer_pair(x: (i32, bool)) -> (bool, u32) {
     // CHECK: store i8 %[[WIDER]]
 
     // CHECK: %[[BYTE:.+]] = load i8
-    // CHECK: trunc i8 %[[BYTE:.+]] to i1
+    // CHECK: trunc{{( nuw)?}} i8 %[[BYTE:.+]] to i1
     // CHECK: load i32
     transmute(x)
 }
diff --git a/tests/codegen/simd/project-to-simd-array-field.rs b/tests/codegen/simd/project-to-simd-array-field.rs
new file mode 100644
index 0000000000000..29fab64063363
--- /dev/null
+++ b/tests/codegen/simd/project-to-simd-array-field.rs
@@ -0,0 +1,31 @@
+//@compile-flags: -Copt-level=3
+
+#![crate_type = "lib"]
+#![feature(repr_simd, core_intrinsics)]
+
+#[allow(non_camel_case_types)]
+#[derive(Clone, Copy)]
+#[repr(simd)]
+struct i32x4([i32; 4]);
+
+#[inline(always)]
+fn to_array4(a: i32x4) -> [i32; 4] {
+    a.0
+}
+
+// CHECK-LABEL: simd_add_self_then_return_array(
+// CHECK-SAME: ptr{{.+}}sret{{.+}}%[[RET:.+]],
+// CHECK-SAME: ptr{{.+}}%a)
+#[no_mangle]
+pub fn simd_add_self_then_return_array(a: &i32x4) -> [i32; 4] {
+    // It would be nice to just ban `.0` into simd types,
+    // but until we do this has to keep working.
+    // See also <https://github.com/rust-lang/rust/issues/105439>
+
+    // CHECK: %[[T1:.+]] = load <4 x i32>, ptr %a
+    // CHECK: %[[T2:.+]] = shl <4 x i32> %[[T1]], {{splat \(i32 1\)|<i32 1, i32 1, i32 1, i32 1>}}
+    // CHECK: store <4 x i32> %[[T2]], ptr %[[RET]]
+    let a = *a;
+    let b = unsafe { core::intrinsics::simd::simd_add(a, a) };
+    to_array4(b)
+}
diff --git a/tests/codegen/transmute-scalar.rs b/tests/codegen/transmute-scalar.rs
index 43da7c1781ef3..c080259a9172e 100644
--- a/tests/codegen/transmute-scalar.rs
+++ b/tests/codegen/transmute-scalar.rs
@@ -26,7 +26,7 @@ pub fn bool_to_byte(b: bool) -> u8 {
 }
 
 // CHECK-LABEL: define{{.*}}zeroext i1 @byte_to_bool(i8{{.*}} %byte)
-// CHECK: %_0 = trunc i8 %byte to i1
+// CHECK: %_0 = trunc{{( nuw)?}} i8 %byte to i1
 // CHECK-NEXT: ret i1 %_0
 #[no_mangle]
 pub unsafe fn byte_to_bool(byte: u8) -> bool {
diff --git a/tests/codegen/unchecked_shifts.rs b/tests/codegen/unchecked_shifts.rs
index b27eb73c0cc94..9fccaf2252e01 100644
--- a/tests/codegen/unchecked_shifts.rs
+++ b/tests/codegen/unchecked_shifts.rs
@@ -1,4 +1,10 @@
-//@ compile-flags: -Copt-level=3
+//@ revisions: LLVM18 LLVM19PLUS
+//@ compile-flags: -Copt-level=3 -C no-prepopulate-passes
+//@[LLVM18] exact-llvm-major-version: 18
+//@[LLVM19PLUS] min-llvm-version: 19
+
+// This runs mir-opts to inline the standard library call, but doesn't run LLVM
+// optimizations so it doesn't need to worry about them adding more flags.
 
 #![crate_type = "lib"]
 #![feature(unchecked_shifts)]
@@ -17,13 +23,10 @@ pub unsafe fn unchecked_shl_unsigned_same(a: u32, b: u32) -> u32 {
 // CHECK-LABEL: @unchecked_shl_unsigned_smaller
 #[no_mangle]
 pub unsafe fn unchecked_shl_unsigned_smaller(a: u16, b: u32) -> u16 {
-    // This uses -DAG to avoid failing on irrelevant reorderings,
-    // like emitting the truncation earlier.
-
-    // CHECK-DAG: %[[INRANGE:.+]] = icmp ult i32 %b, 16
-    // CHECK-DAG: tail call void @llvm.assume(i1 %[[INRANGE]])
-    // CHECK-DAG: %[[TRUNC:.+]] = trunc{{( nuw)?( nsw)?}} i32 %b to i16
-    // CHECK-DAG: shl i16 %a, %[[TRUNC]]
+    // CHECK-NOT: assume
+    // LLVM18: %[[TRUNC:.+]] = trunc i32 %b to i16
+    // LLVM19PLUS: %[[TRUNC:.+]] = trunc nuw i32 %b to i16
+    // CHECK: shl i16 %a, %[[TRUNC]]
     a.unchecked_shl(b)
 }
 
@@ -31,7 +34,7 @@ pub unsafe fn unchecked_shl_unsigned_smaller(a: u16, b: u32) -> u16 {
 #[no_mangle]
 pub unsafe fn unchecked_shl_unsigned_bigger(a: u64, b: u32) -> u64 {
     // CHECK-NOT: assume
-    // CHECK: %[[EXT:.+]] = zext{{( nneg)?}} i32 %b to i64
+    // CHECK: %[[EXT:.+]] = zext i32 %b to i64
     // CHECK: shl i64 %a, %[[EXT]]
     a.unchecked_shl(b)
 }
@@ -49,13 +52,10 @@ pub unsafe fn unchecked_shr_signed_same(a: i32, b: u32) -> i32 {
 // CHECK-LABEL: @unchecked_shr_signed_smaller
 #[no_mangle]
 pub unsafe fn unchecked_shr_signed_smaller(a: i16, b: u32) -> i16 {
-    // This uses -DAG to avoid failing on irrelevant reorderings,
-    // like emitting the truncation earlier.
-
-    // CHECK-DAG: %[[INRANGE:.+]] = icmp ult i32 %b, 16
-    // CHECK-DAG: tail call void @llvm.assume(i1 %[[INRANGE]])
-    // CHECK-DAG: %[[TRUNC:.+]] = trunc{{( nuw)?( nsw)?}}  i32 %b to i16
-    // CHECK-DAG: ashr i16 %a, %[[TRUNC]]
+    // CHECK-NOT: assume
+    // LLVM18: %[[TRUNC:.+]] = trunc i32 %b to i16
+    // LLVM19PLUS: %[[TRUNC:.+]] = trunc nuw i32 %b to i16
+    // CHECK: ashr i16 %a, %[[TRUNC]]
     a.unchecked_shr(b)
 }
 
@@ -63,7 +63,7 @@ pub unsafe fn unchecked_shr_signed_smaller(a: i16, b: u32) -> i16 {
 #[no_mangle]
 pub unsafe fn unchecked_shr_signed_bigger(a: i64, b: u32) -> i64 {
     // CHECK-NOT: assume
-    // CHECK: %[[EXT:.+]] = zext{{( nneg)?}} i32 %b to i64
+    // CHECK: %[[EXT:.+]] = zext i32 %b to i64
     // CHECK: ashr i64 %a, %[[EXT]]
     a.unchecked_shr(b)
 }
@@ -72,7 +72,7 @@ pub unsafe fn unchecked_shr_signed_bigger(a: i64, b: u32) -> i64 {
 #[no_mangle]
 pub unsafe fn unchecked_shr_u128_i8(a: u128, b: i8) -> u128 {
     // CHECK-NOT: assume
-    // CHECK: %[[EXT:.+]] = zext{{( nneg)?}} i8 %b to i128
+    // CHECK: %[[EXT:.+]] = zext i8 %b to i128
     // CHECK: lshr i128 %a, %[[EXT]]
     std::intrinsics::unchecked_shr(a, b)
 }
@@ -81,7 +81,7 @@ pub unsafe fn unchecked_shr_u128_i8(a: u128, b: i8) -> u128 {
 #[no_mangle]
 pub unsafe fn unchecked_shl_i128_u8(a: i128, b: u8) -> i128 {
     // CHECK-NOT: assume
-    // CHECK: %[[EXT:.+]] = zext{{( nneg)?}} i8 %b to i128
+    // CHECK: %[[EXT:.+]] = zext i8 %b to i128
     // CHECK: shl i128 %a, %[[EXT]]
     std::intrinsics::unchecked_shl(a, b)
 }
@@ -89,25 +89,19 @@ pub unsafe fn unchecked_shl_i128_u8(a: i128, b: u8) -> i128 {
 // CHECK-LABEL: @unchecked_shl_u8_i128
 #[no_mangle]
 pub unsafe fn unchecked_shl_u8_i128(a: u8, b: i128) -> u8 {
-    // This uses -DAG to avoid failing on irrelevant reorderings,
-    // like emitting the truncation earlier.
-
-    // CHECK-DAG: %[[INRANGE:.+]] = icmp ult i128 %b, 8
-    // CHECK-DAG: tail call void @llvm.assume(i1 %[[INRANGE]])
-    // CHECK-DAG: %[[TRUNC:.+]] = trunc{{( nuw)?( nsw)?}} i128 %b to i8
-    // CHECK-DAG: shl i8 %a, %[[TRUNC]]
+    // CHECK-NOT: assume
+    // LLVM18: %[[TRUNC:.+]] = trunc i128 %b to i8
+    // LLVM19PLUS: %[[TRUNC:.+]] = trunc nuw i128 %b to i8
+    // CHECK: shl i8 %a, %[[TRUNC]]
     std::intrinsics::unchecked_shl(a, b)
 }
 
 // CHECK-LABEL: @unchecked_shr_i8_u128
 #[no_mangle]
 pub unsafe fn unchecked_shr_i8_u128(a: i8, b: u128) -> i8 {
-    // This uses -DAG to avoid failing on irrelevant reorderings,
-    // like emitting the truncation earlier.
-
-    // CHECK-DAG: %[[INRANGE:.+]] = icmp ult i128 %b, 8
-    // CHECK-DAG: tail call void @llvm.assume(i1 %[[INRANGE]])
-    // CHECK-DAG: %[[TRUNC:.+]] = trunc{{( nuw)?( nsw)?}} i128 %b to i8
-    // CHECK-DAG: ashr i8 %a, %[[TRUNC]]
+    // CHECK-NOT: assume
+    // LLVM18: %[[TRUNC:.+]] = trunc i128 %b to i8
+    // LLVM19PLUS: %[[TRUNC:.+]] = trunc nuw i128 %b to i8
+    // CHECK: ashr i8 %a, %[[TRUNC]]
     std::intrinsics::unchecked_shr(a, b)
 }
diff --git a/tests/codegen/union-abi.rs b/tests/codegen/union-abi.rs
index 16022fad8af7b..28acc4de2f327 100644
--- a/tests/codegen/union-abi.rs
+++ b/tests/codegen/union-abi.rs
@@ -142,4 +142,4 @@ pub union UnionBool {
 pub fn test_UnionBool(b: UnionBool) -> bool {
     unsafe { b.b }
 }
-// CHECK: %_0 = trunc i8 %b to i1
+// CHECK: %_0 = trunc{{( nuw)?}} i8 %b to i1
diff --git a/tests/crashes/project-to-simd-array-field.rs b/tests/crashes/project-to-simd-array-field.rs
new file mode 100644
index 0000000000000..6dc916c41dbd5
--- /dev/null
+++ b/tests/crashes/project-to-simd-array-field.rs
@@ -0,0 +1,33 @@
+//@ known-bug: #137108
+//@compile-flags: -Copt-level=3
+
+// If you fix this, put it in the corresponding codegen test,
+// not in a UI test like the readme says.
+
+#![crate_type = "lib"]
+
+#![feature(repr_simd, core_intrinsics)]
+
+#[allow(non_camel_case_types)]
+#[derive(Clone, Copy)]
+#[repr(simd)]
+struct i32x3([i32; 3]);
+
+const _: () = { assert!(size_of::<i32x3>() == 16) };
+
+#[inline(always)]
+fn to_array3(a: i32x3) -> [i32; 3] {
+    a.0
+}
+
+// CHECK-LABEL: simd_add_self_then_return_array_packed(
+// CHECK-SAME: ptr{{.+}}sret{{.+}}%[[RET:.+]],
+// CHECK-SAME: ptr{{.+}}%a)
+#[no_mangle]
+pub fn simd_add_self_then_return_array_packed(a: i32x3) -> [i32; 3] {
+    // CHECK: %[[T1:.+]] = load <3 x i32>, ptr %a
+    // CHECK: %[[T2:.+]] = shl <3 x i32> %[[T1]], <i32 1, i32 1, i32 1>
+    // CHECK: store <3 x i32> %[[T2]], ptr %[[RET]]
+    let b = unsafe { core::intrinsics::simd::simd_add(a, a) };
+    to_array3(b)
+}