diff --git a/compiler/rustc_codegen_gcc/src/asm.rs b/compiler/rustc_codegen_gcc/src/asm.rs
index 2af050f0c7533..2e8cd934eb298 100644
--- a/compiler/rustc_codegen_gcc/src/asm.rs
+++ b/compiler/rustc_codegen_gcc/src/asm.rs
@@ -589,6 +589,7 @@ fn reg_to_gcc(reg: InlineAsmRegOrRegClass) -> ConstraintOrRegister {
             | InlineAsmRegClass::X86(X86InlineAsmRegClass::ymm_reg) => "x",
             InlineAsmRegClass::X86(X86InlineAsmRegClass::zmm_reg) => "v",
             InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg) => unimplemented!(),
+            InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg0) => unimplemented!(),
             InlineAsmRegClass::Wasm(WasmInlineAsmRegClass::local) => unimplemented!(),
             InlineAsmRegClass::X86(
                 X86InlineAsmRegClass::x87_reg | X86InlineAsmRegClass::mmx_reg,
@@ -654,6 +655,7 @@ fn dummy_output_type<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, reg: InlineAsmRegCl
         | InlineAsmRegClass::X86(X86InlineAsmRegClass::zmm_reg) => cx.type_f32(),
         InlineAsmRegClass::X86(X86InlineAsmRegClass::x87_reg) => unimplemented!(),
         InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg) => cx.type_i16(),
+        InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg0) => cx.type_i16(),
         InlineAsmRegClass::Wasm(WasmInlineAsmRegClass::local) => cx.type_i32(),
         InlineAsmRegClass::SpirV(SpirVInlineAsmRegClass::reg) => {
             bug!("LLVM backend does not support SPIR-V")
@@ -784,6 +786,7 @@ fn modifier_to_gcc(arch: InlineAsmArch, reg: InlineAsmRegClass, modifier: Option
             _ => unreachable!(),
         },
         InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg) => None,
+        InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg0) => None,
         InlineAsmRegClass::X86(X86InlineAsmRegClass::x87_reg | X86InlineAsmRegClass::mmx_reg) => {
             unreachable!("clobber-only")
         }
diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs
index dff3200791825..e994001f96fd9 100644
--- a/compiler/rustc_codegen_llvm/src/asm.rs
+++ b/compiler/rustc_codegen_llvm/src/asm.rs
@@ -602,7 +602,9 @@ fn reg_to_llvm(reg: InlineAsmRegOrRegClass, layout: Option<&TyAndLayout<'_>>) ->
             InlineAsmRegClass::X86(X86InlineAsmRegClass::zmm_reg) => "v",
             InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg) => "^Yk",
             InlineAsmRegClass::X86(
-                X86InlineAsmRegClass::x87_reg | X86InlineAsmRegClass::mmx_reg,
+                X86InlineAsmRegClass::x87_reg
+                | X86InlineAsmRegClass::mmx_reg
+                | X86InlineAsmRegClass::kreg0,
             ) => unreachable!("clobber-only"),
             InlineAsmRegClass::Wasm(WasmInlineAsmRegClass::local) => "r",
             InlineAsmRegClass::Bpf(BpfInlineAsmRegClass::reg) => "r",
@@ -687,7 +689,11 @@ fn modifier_to_llvm(
             _ => unreachable!(),
         },
         InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg) => None,
-        InlineAsmRegClass::X86(X86InlineAsmRegClass::x87_reg | X86InlineAsmRegClass::mmx_reg) => {
+        InlineAsmRegClass::X86(
+            X86InlineAsmRegClass::x87_reg
+            | X86InlineAsmRegClass::mmx_reg
+            | X86InlineAsmRegClass::kreg0,
+        ) => {
             unreachable!("clobber-only")
         }
         InlineAsmRegClass::Wasm(WasmInlineAsmRegClass::local) => None,
@@ -757,7 +763,11 @@ fn dummy_output_type<'ll>(cx: &CodegenCx<'ll, '_>, reg: InlineAsmRegClass) -> &'
         | InlineAsmRegClass::X86(X86InlineAsmRegClass::ymm_reg)
         | InlineAsmRegClass::X86(X86InlineAsmRegClass::zmm_reg) => cx.type_f32(),
         InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg) => cx.type_i16(),
-        InlineAsmRegClass::X86(X86InlineAsmRegClass::x87_reg | X86InlineAsmRegClass::mmx_reg) => {
+        InlineAsmRegClass::X86(
+            X86InlineAsmRegClass::x87_reg
+            | X86InlineAsmRegClass::mmx_reg
+            | X86InlineAsmRegClass::kreg0,
+        ) => {
             unreachable!("clobber-only")
         }
         InlineAsmRegClass::Wasm(WasmInlineAsmRegClass::local) => cx.type_i32(),
diff --git a/compiler/rustc_const_eval/src/const_eval/mod.rs b/compiler/rustc_const_eval/src/const_eval/mod.rs
index 80270f825630f..68f9bee593f65 100644
--- a/compiler/rustc_const_eval/src/const_eval/mod.rs
+++ b/compiler/rustc_const_eval/src/const_eval/mod.rs
@@ -38,7 +38,7 @@ pub(crate) fn const_caller_location(
     if intern_const_alloc_recursive(&mut ecx, InternKind::Constant, &loc_place).is_err() {
         bug!("intern_const_alloc_recursive should not error in this case")
     }
-    ConstValue::Scalar(Scalar::from_pointer(loc_place.ptr.into_pointer_or_addr().unwrap(), &tcx))
+    ConstValue::Scalar(Scalar::from_maybe_pointer(loc_place.ptr, &tcx))
 }
 
 /// Convert an evaluated constant to a type level constant
diff --git a/compiler/rustc_const_eval/src/interpret/machine.rs b/compiler/rustc_const_eval/src/interpret/machine.rs
index ddfbcbdd22e16..7721485771b3b 100644
--- a/compiler/rustc_const_eval/src/interpret/machine.rs
+++ b/compiler/rustc_const_eval/src/interpret/machine.rs
@@ -88,6 +88,10 @@ pub trait Machine<'mir, 'tcx>: Sized {
     /// Pointers are "tagged" with provenance information; typically the `AllocId` they belong to.
     type PointerTag: Provenance + Eq + Hash + 'static;
 
+    /// When getting the AllocId of a pointer, some extra data is also obtained from the tag
+    /// that is passed to memory access hooks so they can do things with it.
+    type TagExtra: Copy + 'static;
+
     /// Machines can define extra (non-instance) things that represent values of function pointers.
     /// For example, Miri uses this to return a function pointer from `dlsym`
     /// that can later be called to execute the right thing.
@@ -122,6 +126,8 @@ pub trait Machine<'mir, 'tcx>: Sized {
 
     /// Whether, when checking alignment, we should `force_int` and thus support
     /// custom alignment logic based on whatever the integer address happens to be.
+    ///
+    /// Requires PointerTag::OFFSET_IS_ADDR to be true.
     fn force_int_for_alignment_check(ecx: &InterpCx<'mir, 'tcx, Self>) -> bool;
 
     /// Whether to enforce the validity invariant
@@ -285,11 +291,14 @@ pub trait Machine<'mir, 'tcx>: Sized {
         addr: u64,
     ) -> Pointer<Option<Self::PointerTag>>;
 
-    /// Convert a pointer with provenance into an allocation-offset pair.
+    /// Convert a pointer with provenance into an allocation-offset pair
+    /// and extra provenance info.
+    ///
+    /// The returned `AllocId` must be the same as `ptr.provenance.get_alloc_id()`.
     fn ptr_get_alloc(
         ecx: &InterpCx<'mir, 'tcx, Self>,
         ptr: Pointer<Self::PointerTag>,
-    ) -> (AllocId, Size);
+    ) -> (AllocId, Size, Self::TagExtra);
 
     /// Called to initialize the "extra" state of an allocation and make the pointers
     /// it contains (in relocations) tagged.  The way we construct allocations is
@@ -321,7 +330,7 @@ pub trait Machine<'mir, 'tcx>: Sized {
         _tcx: TyCtxt<'tcx>,
         _machine: &Self,
         _alloc_extra: &Self::AllocExtra,
-        _tag: Self::PointerTag,
+        _tag: (AllocId, Self::TagExtra),
         _range: AllocRange,
     ) -> InterpResult<'tcx> {
         Ok(())
@@ -333,7 +342,7 @@ pub trait Machine<'mir, 'tcx>: Sized {
         _tcx: TyCtxt<'tcx>,
         _machine: &mut Self,
         _alloc_extra: &mut Self::AllocExtra,
-        _tag: Self::PointerTag,
+        _tag: (AllocId, Self::TagExtra),
         _range: AllocRange,
     ) -> InterpResult<'tcx> {
         Ok(())
@@ -345,7 +354,7 @@ pub trait Machine<'mir, 'tcx>: Sized {
         _tcx: TyCtxt<'tcx>,
         _machine: &mut Self,
         _alloc_extra: &mut Self::AllocExtra,
-        _tag: Self::PointerTag,
+        _tag: (AllocId, Self::TagExtra),
         _range: AllocRange,
     ) -> InterpResult<'tcx> {
         Ok(())
@@ -397,6 +406,8 @@ pub trait Machine<'mir, 'tcx>: Sized {
 // (CTFE and ConstProp) use the same instance.  Here, we share that code.
 pub macro compile_time_machine(<$mir: lifetime, $tcx: lifetime>) {
     type PointerTag = AllocId;
+    type TagExtra = ();
+
     type ExtraFnVal = !;
 
     type MemoryMap =
@@ -474,9 +485,12 @@ pub macro compile_time_machine(<$mir: lifetime, $tcx: lifetime>) {
     }
 
     #[inline(always)]
-    fn ptr_get_alloc(_ecx: &InterpCx<$mir, $tcx, Self>, ptr: Pointer<AllocId>) -> (AllocId, Size) {
+    fn ptr_get_alloc(
+        _ecx: &InterpCx<$mir, $tcx, Self>,
+        ptr: Pointer<AllocId>,
+    ) -> (AllocId, Size, Self::TagExtra) {
         // We know `offset` is relative to the allocation, so we can use `into_parts`.
         let (alloc_id, offset) = ptr.into_parts();
-        (alloc_id, offset)
+        (alloc_id, offset, ())
     }
 }
diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs
index a165fa23f30ac..e8ee0fe6ea6ee 100644
--- a/compiler/rustc_const_eval/src/interpret/memory.rs
+++ b/compiler/rustc_const_eval/src/interpret/memory.rs
@@ -158,8 +158,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         &self,
         ptr: Pointer<AllocId>,
     ) -> InterpResult<'tcx, Pointer<M::PointerTag>> {
-        // We know `offset` is relative to the allocation, so we can use `into_parts`.
-        let (alloc_id, offset) = ptr.into_parts();
+        let alloc_id = ptr.provenance;
         // We need to handle `extern static`.
         match self.tcx.get_global_alloc(alloc_id) {
             Some(GlobalAlloc::Static(def_id)) if self.tcx.is_thread_local_static(def_id) => {
@@ -171,7 +170,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
             _ => {}
         }
         // And we need to get the tag.
-        Ok(M::tag_alloc_base_pointer(self, Pointer::new(alloc_id, offset)))
+        Ok(M::tag_alloc_base_pointer(self, ptr))
     }
 
     pub fn create_fn_alloc_ptr(
@@ -238,7 +237,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         new_align: Align,
         kind: MemoryKind<M::MemoryKind>,
     ) -> InterpResult<'tcx, Pointer<M::PointerTag>> {
-        let (alloc_id, offset, ptr) = self.ptr_get_alloc_id(ptr)?;
+        let (alloc_id, offset, _tag) = self.ptr_get_alloc_id(ptr)?;
         if offset.bytes() != 0 {
             throw_ub_format!(
                 "reallocating {:?} which does not point to the beginning of an object",
@@ -255,14 +254,14 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         };
         // This will also call the access hooks.
         self.mem_copy(
-            ptr.into(),
+            ptr,
             Align::ONE,
             new_ptr.into(),
             Align::ONE,
             old_size.min(new_size),
             /*nonoverlapping*/ true,
         )?;
-        self.deallocate_ptr(ptr.into(), old_size_and_align, kind)?;
+        self.deallocate_ptr(ptr, old_size_and_align, kind)?;
 
         Ok(new_ptr)
     }
@@ -274,7 +273,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         old_size_and_align: Option<(Size, Align)>,
         kind: MemoryKind<M::MemoryKind>,
     ) -> InterpResult<'tcx> {
-        let (alloc_id, offset, ptr) = self.ptr_get_alloc_id(ptr)?;
+        let (alloc_id, offset, tag) = self.ptr_get_alloc_id(ptr)?;
         trace!("deallocating: {}", alloc_id);
 
         if offset.bytes() != 0 {
@@ -330,7 +329,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
             *self.tcx,
             &mut self.machine,
             &mut alloc.extra,
-            ptr.provenance,
+            (alloc_id, tag),
             alloc_range(Size::ZERO, size),
         )?;
 
@@ -350,17 +349,17 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         ptr: Pointer<Option<M::PointerTag>>,
         size: Size,
         align: Align,
-    ) -> InterpResult<'tcx, Option<(AllocId, Size, Pointer<M::PointerTag>)>> {
+    ) -> InterpResult<'tcx, Option<(AllocId, Size, M::TagExtra)>> {
         let align = M::enforce_alignment(&self).then_some(align);
         self.check_and_deref_ptr(
             ptr,
             size,
             align,
             CheckInAllocMsg::MemoryAccessTest,
-            |alloc_id, offset, ptr| {
+            |alloc_id, offset, tag| {
                 let (size, align) =
                     self.get_alloc_size_and_align(alloc_id, AllocCheck::Dereferenceable)?;
-                Ok((size, align, (alloc_id, offset, ptr)))
+                Ok((size, align, (alloc_id, offset, tag)))
             },
         )
     }
@@ -401,11 +400,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         size: Size,
         align: Option<Align>,
         msg: CheckInAllocMsg,
-        alloc_size: impl FnOnce(
-            AllocId,
-            Size,
-            Pointer<M::PointerTag>,
-        ) -> InterpResult<'tcx, (Size, Align, T)>,
+        alloc_size: impl FnOnce(AllocId, Size, M::TagExtra) -> InterpResult<'tcx, (Size, Align, T)>,
     ) -> InterpResult<'tcx, Option<T>> {
         fn check_offset_align(offset: u64, align: Align) -> InterpResult<'static> {
             if offset % align.bytes() == 0 {
@@ -433,8 +428,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
                 }
                 None
             }
-            Ok((alloc_id, offset, ptr)) => {
-                let (alloc_size, alloc_align, ret_val) = alloc_size(alloc_id, offset, ptr)?;
+            Ok((alloc_id, offset, tag)) => {
+                let (alloc_size, alloc_align, ret_val) = alloc_size(alloc_id, offset, tag)?;
                 // Test bounds. This also ensures non-null.
                 // It is sufficient to check this for the end pointer. Also check for overflow!
                 if offset.checked_add(size, &self.tcx).map_or(true, |end| end > alloc_size) {
@@ -450,10 +445,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
                 // we want the error to be about the bounds.
                 if let Some(align) = align {
                     if M::force_int_for_alignment_check(self) {
-                        let addr = Scalar::from_pointer(ptr, &self.tcx)
-                            .to_machine_usize(&self.tcx)
-                            .expect("ptr-to-int cast for align check should never fail");
-                        check_offset_align(addr, align)?;
+                        // `force_int_for_alignment_check` can only be true if `OFFSET_IS_ADDR` is true.
+                        check_offset_align(ptr.addr().bytes(), align)?;
                     } else {
                         // Check allocation alignment and offset alignment.
                         if alloc_align.bytes() < align.bytes() {
@@ -569,14 +562,14 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
             size,
             align,
             CheckInAllocMsg::MemoryAccessTest,
-            |alloc_id, offset, ptr| {
+            |alloc_id, offset, tag| {
                 let alloc = self.get_alloc_raw(alloc_id)?;
-                Ok((alloc.size(), alloc.align, (alloc_id, offset, ptr, alloc)))
+                Ok((alloc.size(), alloc.align, (alloc_id, offset, tag, alloc)))
             },
         )?;
-        if let Some((alloc_id, offset, ptr, alloc)) = ptr_and_alloc {
+        if let Some((alloc_id, offset, tag, alloc)) = ptr_and_alloc {
             let range = alloc_range(offset, size);
-            M::memory_read(*self.tcx, &self.machine, &alloc.extra, ptr.provenance, range)?;
+            M::memory_read(*self.tcx, &self.machine, &alloc.extra, (alloc_id, tag), range)?;
             Ok(Some(AllocRef { alloc, range, tcx: *self.tcx, alloc_id }))
         } else {
             // Even in this branch we have to be sure that we actually access the allocation, in
@@ -631,13 +624,13 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         align: Align,
     ) -> InterpResult<'tcx, Option<AllocRefMut<'a, 'tcx, M::PointerTag, M::AllocExtra>>> {
         let parts = self.get_ptr_access(ptr, size, align)?;
-        if let Some((alloc_id, offset, ptr)) = parts {
+        if let Some((alloc_id, offset, tag)) = parts {
             let tcx = *self.tcx;
             // FIXME: can we somehow avoid looking up the allocation twice here?
             // We cannot call `get_raw_mut` inside `check_and_deref_ptr` as that would duplicate `&mut self`.
             let (alloc, machine) = self.get_alloc_raw_mut(alloc_id)?;
             let range = alloc_range(offset, size);
-            M::memory_written(tcx, machine, &mut alloc.extra, ptr.provenance, range)?;
+            M::memory_written(tcx, machine, &mut alloc.extra, (alloc_id, tag), range)?;
             Ok(Some(AllocRefMut { alloc, range, tcx, alloc_id }))
         } else {
             Ok(None)
@@ -732,7 +725,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         ptr: Pointer<Option<M::PointerTag>>,
     ) -> InterpResult<'tcx, FnVal<'tcx, M::ExtraFnVal>> {
         trace!("get_fn({:?})", ptr);
-        let (alloc_id, offset, _ptr) = self.ptr_get_alloc_id(ptr)?;
+        let (alloc_id, offset, _tag) = self.ptr_get_alloc_id(ptr)?;
         if offset.bytes() != 0 {
             throw_ub!(InvalidFunctionPointer(Pointer::new(alloc_id, offset)))
         }
@@ -892,8 +885,11 @@ impl<'tcx, 'a, Tag: Provenance, Extra> AllocRefMut<'a, 'tcx, Tag, Extra> {
     }
 
     /// Mark the entire referenced range as uninitalized
-    pub fn write_uninit(&mut self) {
-        self.alloc.mark_init(self.range, false);
+    pub fn write_uninit(&mut self) -> InterpResult<'tcx> {
+        Ok(self
+            .alloc
+            .write_uninit(&self.tcx, self.range)
+            .map_err(|e| e.to_interp_error(self.alloc_id))?)
     }
 }
 
@@ -1009,16 +1005,16 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         // and once below to get the underlying `&[mut] Allocation`.
 
         // Source alloc preparations and access hooks.
-        let Some((src_alloc_id, src_offset, src)) = src_parts else {
+        let Some((src_alloc_id, src_offset, src_tag)) = src_parts else {
             // Zero-sized *source*, that means dst is also zero-sized and we have nothing to do.
             return Ok(());
         };
         let src_alloc = self.get_alloc_raw(src_alloc_id)?;
         let src_range = alloc_range(src_offset, size);
-        M::memory_read(*tcx, &self.machine, &src_alloc.extra, src.provenance, src_range)?;
+        M::memory_read(*tcx, &self.machine, &src_alloc.extra, (src_alloc_id, src_tag), src_range)?;
         // We need the `dest` ptr for the next operation, so we get it now.
         // We already did the source checks and called the hooks so we are good to return early.
-        let Some((dest_alloc_id, dest_offset, dest)) = dest_parts else {
+        let Some((dest_alloc_id, dest_offset, dest_tag)) = dest_parts else {
             // Zero-sized *destination*.
             return Ok(());
         };
@@ -1040,7 +1036,13 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         // Destination alloc preparations and access hooks.
         let (dest_alloc, extra) = self.get_alloc_raw_mut(dest_alloc_id)?;
         let dest_range = alloc_range(dest_offset, size * num_copies);
-        M::memory_written(*tcx, extra, &mut dest_alloc.extra, dest.provenance, dest_range)?;
+        M::memory_written(
+            *tcx,
+            extra,
+            &mut dest_alloc.extra,
+            (dest_alloc_id, dest_tag),
+            dest_range,
+        )?;
         let dest_bytes = dest_alloc
             .get_bytes_mut_ptr(&tcx, dest_range)
             .map_err(|e| e.to_interp_error(dest_alloc_id))?
@@ -1053,8 +1055,10 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
             // This also avoids writing to the target bytes so that the backing allocation is never
             // touched if the bytes stay uninitialized for the whole interpreter execution. On contemporary
             // operating system this can avoid physically allocating the page.
-            dest_alloc.mark_init(dest_range, false); // `Size` multiplication
-            dest_alloc.mark_relocation_range(relocations);
+            dest_alloc
+                .write_uninit(&tcx, dest_range)
+                .map_err(|e| e.to_interp_error(dest_alloc_id))?;
+            // We can forget about the relocations, this is all not initialized anyway.
             return Ok(());
         }
 
@@ -1159,11 +1163,11 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
     pub fn ptr_try_get_alloc_id(
         &self,
         ptr: Pointer<Option<M::PointerTag>>,
-    ) -> Result<(AllocId, Size, Pointer<M::PointerTag>), u64> {
+    ) -> Result<(AllocId, Size, M::TagExtra), u64> {
         match ptr.into_pointer_or_addr() {
             Ok(ptr) => {
-                let (alloc_id, offset) = M::ptr_get_alloc(self, ptr);
-                Ok((alloc_id, offset, ptr))
+                let (alloc_id, offset, extra) = M::ptr_get_alloc(self, ptr);
+                Ok((alloc_id, offset, extra))
             }
             Err(addr) => Err(addr.bytes()),
         }
@@ -1174,7 +1178,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
     pub fn ptr_get_alloc_id(
         &self,
         ptr: Pointer<Option<M::PointerTag>>,
-    ) -> InterpResult<'tcx, (AllocId, Size, Pointer<M::PointerTag>)> {
+    ) -> InterpResult<'tcx, (AllocId, Size, M::TagExtra)> {
         self.ptr_try_get_alloc_id(ptr).map_err(|offset| {
             err_ub!(DanglingIntPointer(offset, CheckInAllocMsg::InboundsTest)).into()
         })
diff --git a/compiler/rustc_const_eval/src/interpret/place.rs b/compiler/rustc_const_eval/src/interpret/place.rs
index 31da4522a1fda..e4660fe090ce5 100644
--- a/compiler/rustc_const_eval/src/interpret/place.rs
+++ b/compiler/rustc_const_eval/src/interpret/place.rs
@@ -823,7 +823,7 @@ where
             // Zero-sized access
             return Ok(());
         };
-        alloc.write_uninit();
+        alloc.write_uninit()?;
         Ok(())
     }
 
diff --git a/compiler/rustc_const_eval/src/interpret/validity.rs b/compiler/rustc_const_eval/src/interpret/validity.rs
index 4a0aa41de739b..71d29be97d5ec 100644
--- a/compiler/rustc_const_eval/src/interpret/validity.rs
+++ b/compiler/rustc_const_eval/src/interpret/validity.rs
@@ -432,7 +432,7 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValidityVisitor<'rt, 'mir, '
         if let Some(ref mut ref_tracking) = self.ref_tracking {
             // Proceed recursively even for ZST, no reason to skip them!
             // `!` is a ZST and we want to validate it.
-            if let Ok((alloc_id, _offset, _ptr)) = self.ecx.ptr_try_get_alloc_id(place.ptr) {
+            if let Ok((alloc_id, _offset, _tag)) = self.ecx.ptr_try_get_alloc_id(place.ptr) {
                 // Special handling for pointers to statics (irrespective of their type).
                 let alloc_kind = self.ecx.tcx.get_global_alloc(alloc_id);
                 if let Some(GlobalAlloc::Static(did)) = alloc_kind {
diff --git a/compiler/rustc_error_messages/src/lib.rs b/compiler/rustc_error_messages/src/lib.rs
index de0dd18cc6ec5..e1e0ed7222d55 100644
--- a/compiler/rustc_error_messages/src/lib.rs
+++ b/compiler/rustc_error_messages/src/lib.rs
@@ -338,18 +338,12 @@ impl MultiSpan {
 
     /// Returns `true` if any of the primary spans are displayable.
     pub fn has_primary_spans(&self) -> bool {
-        self.primary_spans.iter().any(|sp| !sp.is_dummy())
+        !self.is_dummy()
     }
 
     /// Returns `true` if this contains only a dummy primary span with any hygienic context.
     pub fn is_dummy(&self) -> bool {
-        let mut is_dummy = true;
-        for span in &self.primary_spans {
-            if !span.is_dummy() {
-                is_dummy = false;
-            }
-        }
-        is_dummy
+        self.primary_spans.iter().all(|sp| sp.is_dummy())
     }
 
     /// Replaces all occurrences of one Span with another. Used to move `Span`s in areas that don't
diff --git a/compiler/rustc_middle/src/mir/interpret/allocation.rs b/compiler/rustc_middle/src/mir/interpret/allocation.rs
index 438f356f072c6..7723f7a64f769 100644
--- a/compiler/rustc_middle/src/mir/interpret/allocation.rs
+++ b/compiler/rustc_middle/src/mir/interpret/allocation.rs
@@ -269,7 +269,7 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
     /// `get_bytes_with_uninit_and_ptr` instead,
     ///
     /// This function also guarantees that the resulting pointer will remain stable
-    /// even when new allocations are pushed to the `HashMap`. `copy_repeatedly` relies
+    /// even when new allocations are pushed to the `HashMap`. `mem_copy_repeatedly` relies
     /// on that.
     ///
     /// It is the caller's responsibility to check bounds and alignment beforehand.
@@ -429,8 +429,7 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
         let val = match val {
             ScalarMaybeUninit::Scalar(scalar) => scalar,
             ScalarMaybeUninit::Uninit => {
-                self.mark_init(range, false);
-                return Ok(());
+                return self.write_uninit(cx, range);
             }
         };
 
@@ -455,6 +454,13 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
 
         Ok(())
     }
+
+    /// Write "uninit" to the given memory range.
+    pub fn write_uninit(&mut self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult {
+        self.mark_init(range, false);
+        self.clear_relocations(cx, range)?;
+        return Ok(());
+    }
 }
 
 /// Relocations.
@@ -561,8 +567,10 @@ impl<Tag> Deref for Relocations<Tag> {
 }
 
 /// A partial, owned list of relocations to transfer into another allocation.
+///
+/// Offsets are already adjusted to the destination allocation.
 pub struct AllocationRelocations<Tag> {
-    relative_relocations: Vec<(Size, Tag)>,
+    dest_relocations: Vec<(Size, Tag)>,
 }
 
 impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
@@ -575,12 +583,17 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
     ) -> AllocationRelocations<Tag> {
         let relocations = self.get_relocations(cx, src);
         if relocations.is_empty() {
-            return AllocationRelocations { relative_relocations: Vec::new() };
+            return AllocationRelocations { dest_relocations: Vec::new() };
         }
 
         let size = src.size;
         let mut new_relocations = Vec::with_capacity(relocations.len() * (count as usize));
 
+        // If `count` is large, this is rather wasteful -- we are allocating a big array here, which
+        // is mostly filled with redundant information since it's just N copies of the same `Tag`s
+        // at slightly adjusted offsets. The reason we do this is so that in `mark_relocation_range`
+        // we can use `insert_presorted`. That wouldn't work with an `Iterator` that just produces
+        // the right sequence of relocations for all N copies.
         for i in 0..count {
             new_relocations.extend(relocations.iter().map(|&(offset, reloc)| {
                 // compute offset for current repetition
@@ -593,14 +606,17 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
             }));
         }
 
-        AllocationRelocations { relative_relocations: new_relocations }
+        AllocationRelocations { dest_relocations: new_relocations }
     }
 
     /// Applies a relocation copy.
     /// The affected range, as defined in the parameters to `prepare_relocation_copy` is expected
     /// to be clear of relocations.
+    ///
+    /// This is dangerous to use as it can violate internal `Allocation` invariants!
+    /// It only exists to support an efficient implementation of `mem_copy_repeatedly`.
     pub fn mark_relocation_range(&mut self, relocations: AllocationRelocations<Tag>) {
-        self.relocations.0.insert_presorted(relocations.relative_relocations);
+        self.relocations.0.insert_presorted(relocations.dest_relocations);
     }
 }
 
@@ -1056,7 +1072,7 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
         })
     }
 
-    pub fn mark_init(&mut self, range: AllocRange, is_init: bool) {
+    fn mark_init(&mut self, range: AllocRange, is_init: bool) {
         if range.size.bytes() == 0 {
             return;
         }
@@ -1118,6 +1134,9 @@ impl<Tag, Extra> Allocation<Tag, Extra> {
     }
 
     /// Applies multiple instances of the run-length encoding to the initialization mask.
+    ///
+    /// This is dangerous to use as it can violate internal `Allocation` invariants!
+    /// It only exists to support an efficient implementation of `mem_copy_repeatedly`.
     pub fn mark_compressed_init_range(
         &mut self,
         defined: &InitMaskCompressed,
diff --git a/compiler/rustc_middle/src/mir/interpret/pointer.rs b/compiler/rustc_middle/src/mir/interpret/pointer.rs
index 813c0912f5396..c71aea417eca0 100644
--- a/compiler/rustc_middle/src/mir/interpret/pointer.rs
+++ b/compiler/rustc_middle/src/mir/interpret/pointer.rs
@@ -163,6 +163,9 @@ pub struct Pointer<Tag = AllocId> {
 }
 
 static_assert_size!(Pointer, 16);
+// `Option<Tag>` pointers are also passed around quite a bit
+// (but not stored in permanent machine state).
+static_assert_size!(Pointer<Option<AllocId>>, 16);
 
 // We want the `Debug` output to be readable as it is used by `derive(Debug)` for
 // all the Miri types.
@@ -198,12 +201,26 @@ impl<Tag> From<Pointer<Tag>> for Pointer<Option<Tag>> {
 }
 
 impl<Tag> Pointer<Option<Tag>> {
+    /// Convert this pointer that *might* have a tag into a pointer that *definitely* has a tag, or
+    /// an absolute address.
+    ///
+    /// This is rarely what you want; call `ptr_try_get_alloc_id` instead.
     pub fn into_pointer_or_addr(self) -> Result<Pointer<Tag>, Size> {
         match self.provenance {
             Some(tag) => Ok(Pointer::new(tag, self.offset)),
             None => Err(self.offset),
         }
     }
+
+    /// Returns the absolute address the pointer points to.
+    /// Only works if Tag::OFFSET_IS_ADDR is true!
+    pub fn addr(self) -> Size
+    where
+        Tag: Provenance,
+    {
+        assert!(Tag::OFFSET_IS_ADDR);
+        self.offset
+    }
 }
 
 impl<Tag> Pointer<Option<Tag>> {
diff --git a/compiler/rustc_middle/src/mir/pretty.rs b/compiler/rustc_middle/src/mir/pretty.rs
index 69dac03883940..b7f695da544f1 100644
--- a/compiler/rustc_middle/src/mir/pretty.rs
+++ b/compiler/rustc_middle/src/mir/pretty.rs
@@ -851,6 +851,7 @@ fn write_allocation_bytes<'tcx, Tag: Provenance, Extra>(
         }
         if let Some(&tag) = alloc.relocations().get(&i) {
             // Memory with a relocation must be defined
+            assert!(alloc.init_mask().is_range_initialized(i, i + ptr_size).is_ok());
             let j = i.bytes_usize();
             let offset = alloc
                 .inspect_with_uninit_and_ptr_outside_interpreter(j..j + ptr_size.bytes_usize());
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index d9bada295894f..e3ce8105a8b47 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -803,6 +803,7 @@ symbols! {
         keyword,
         kind,
         kreg,
+        kreg0,
         label,
         label_break_value,
         lang,
diff --git a/compiler/rustc_target/src/asm/mod.rs b/compiler/rustc_target/src/asm/mod.rs
index 5bc4b566daf67..6bc807c7c4421 100644
--- a/compiler/rustc_target/src/asm/mod.rs
+++ b/compiler/rustc_target/src/asm/mod.rs
@@ -893,7 +893,7 @@ impl InlineAsmClobberAbi {
 
                     xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
 
-                    k1, k2, k3, k4, k5, k6, k7,
+                    k0, k1, k2, k3, k4, k5, k6, k7,
 
                     mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7,
                     st0, st1, st2, st3, st4, st5, st6, st7,
@@ -908,7 +908,7 @@ impl InlineAsmClobberAbi {
                     zmm16, zmm17, zmm18, zmm19, zmm20, zmm21, zmm22, zmm23,
                     zmm24, zmm25, zmm26, zmm27, zmm28, zmm29, zmm30, zmm31,
 
-                    k1, k2, k3, k4, k5, k6, k7,
+                    k0, k1, k2, k3, k4, k5, k6, k7,
 
                     mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7,
                     st0, st1, st2, st3, st4, st5, st6, st7,
@@ -927,7 +927,7 @@ impl InlineAsmClobberAbi {
                     zmm16, zmm17, zmm18, zmm19, zmm20, zmm21, zmm22, zmm23,
                     zmm24, zmm25, zmm26, zmm27, zmm28, zmm29, zmm30, zmm31,
 
-                    k1, k2, k3, k4, k5, k6, k7,
+                    k0, k1, k2, k3, k4, k5, k6, k7,
 
                     mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7,
                     st0, st1, st2, st3, st4, st5, st6, st7,
diff --git a/compiler/rustc_target/src/asm/x86.rs b/compiler/rustc_target/src/asm/x86.rs
index 7c136a475486b..854674c7f2fa7 100644
--- a/compiler/rustc_target/src/asm/x86.rs
+++ b/compiler/rustc_target/src/asm/x86.rs
@@ -14,6 +14,7 @@ def_reg_class! {
         ymm_reg,
         zmm_reg,
         kreg,
+        kreg0,
         mmx_reg,
         x87_reg,
     }
@@ -38,7 +39,7 @@ impl X86InlineAsmRegClass {
             }
             Self::reg_byte => &[],
             Self::xmm_reg | Self::ymm_reg | Self::zmm_reg => &['x', 'y', 'z'],
-            Self::kreg => &[],
+            Self::kreg | Self::kreg0 => &[],
             Self::mmx_reg | Self::x87_reg => &[],
         }
     }
@@ -77,7 +78,7 @@ impl X86InlineAsmRegClass {
                 256 => Some(('y', "ymm0")),
                 _ => Some(('x', "xmm0")),
             },
-            Self::kreg => None,
+            Self::kreg | Self::kreg0 => None,
             Self::mmx_reg | Self::x87_reg => None,
         }
     }
@@ -95,7 +96,7 @@ impl X86InlineAsmRegClass {
             Self::xmm_reg => Some(('x', "xmm0")),
             Self::ymm_reg => Some(('y', "ymm0")),
             Self::zmm_reg => Some(('z', "zmm0")),
-            Self::kreg => None,
+            Self::kreg | Self::kreg0 => None,
             Self::mmx_reg | Self::x87_reg => None,
         }
     }
@@ -132,6 +133,7 @@ impl X86InlineAsmRegClass {
                 avx512f: I8, I16;
                 avx512bw: I32, I64;
             },
+            Self::kreg0 => &[],
             Self::mmx_reg | Self::x87_reg => &[],
         }
     }
@@ -294,6 +296,7 @@ def_regs! {
         zmm29: zmm_reg = ["zmm29", "xmm29", "ymm29"] % x86_64_only,
         zmm30: zmm_reg = ["zmm30", "xmm30", "ymm30"] % x86_64_only,
         zmm31: zmm_reg = ["zmm31", "xmm31", "ymm31"] % x86_64_only,
+        k0: kreg0 = ["k0"],
         k1: kreg = ["k1"],
         k2: kreg = ["k2"],
         k3: kreg = ["k3"],
@@ -323,8 +326,6 @@ def_regs! {
             "the stack pointer cannot be used as an operand for inline asm",
         #error = ["ip", "eip", "rip"] =>
             "the instruction pointer cannot be used as an operand for inline asm",
-        #error = ["k0"] =>
-            "the k0 AVX mask register cannot be used as an operand for inline asm",
     }
 }
 
diff --git a/library/std/src/sys/unix/futex.rs b/library/std/src/sys/unix/futex.rs
index 62760373a6aff..c12ee169e797a 100644
--- a/library/std/src/sys/unix/futex.rs
+++ b/library/std/src/sys/unix/futex.rs
@@ -52,25 +52,6 @@ pub fn futex_wait(futex: &AtomicU32, expected: u32, timeout: Option<Duration>) -
     }
 }
 
-#[cfg(target_os = "emscripten")]
-pub fn futex_wait(futex: &AtomicU32, expected: u32, timeout: Option<Duration>) {
-    extern "C" {
-        fn emscripten_futex_wait(
-            addr: *const AtomicU32,
-            val: libc::c_uint,
-            max_wait_ms: libc::c_double,
-        ) -> libc::c_int;
-    }
-
-    unsafe {
-        emscripten_futex_wait(
-            futex,
-            expected,
-            timeout.map_or(crate::f64::INFINITY, |d| d.as_secs_f64() * 1000.0),
-        );
-    }
-}
-
 /// Wake up one thread that's blocked on futex_wait on this futex.
 ///
 /// Returns true if this actually woke up such a thread,
@@ -101,10 +82,32 @@ pub fn futex_wake_all(futex: &AtomicU32) {
 }
 
 #[cfg(target_os = "emscripten")]
-pub fn futex_wake(futex: &AtomicU32) -> bool {
-    extern "C" {
-        fn emscripten_futex_wake(addr: *const AtomicU32, count: libc::c_int) -> libc::c_int;
+extern "C" {
+    fn emscripten_futex_wake(addr: *const AtomicU32, count: libc::c_int) -> libc::c_int;
+    fn emscripten_futex_wait(
+        addr: *const AtomicU32,
+        val: libc::c_uint,
+        max_wait_ms: libc::c_double,
+    ) -> libc::c_int;
+}
+
+#[cfg(target_os = "emscripten")]
+pub fn futex_wait(futex: &AtomicU32, expected: u32, timeout: Option<Duration>) -> bool {
+    unsafe {
+        emscripten_futex_wait(
+            futex,
+            expected,
+            timeout.map_or(f64::INFINITY, |d| d.as_secs_f64() * 1000.0),
+        ) != -libc::ETIMEDOUT
     }
+}
 
+#[cfg(target_os = "emscripten")]
+pub fn futex_wake(futex: &AtomicU32) -> bool {
     unsafe { emscripten_futex_wake(futex, 1) > 0 }
 }
+
+#[cfg(target_os = "emscripten")]
+pub fn futex_wake_all(futex: &AtomicU32) {
+    unsafe { emscripten_futex_wake(futex, i32::MAX) };
+}
diff --git a/library/std/src/sys/unix/locks/mod.rs b/library/std/src/sys/unix/locks/mod.rs
index 17796f8894b5d..3e39c8b9b23e7 100644
--- a/library/std/src/sys/unix/locks/mod.rs
+++ b/library/std/src/sys/unix/locks/mod.rs
@@ -2,6 +2,7 @@ cfg_if::cfg_if! {
     if #[cfg(any(
         target_os = "linux",
         target_os = "android",
+        all(target_os = "emscripten", target_feature = "atomics"),
     ))] {
         mod futex;
         mod futex_rwlock;
diff --git a/src/librustdoc/html/format.rs b/src/librustdoc/html/format.rs
index 6954e2363f5f0..fd6d675dc8b8c 100644
--- a/src/librustdoc/html/format.rs
+++ b/src/librustdoc/html/format.rs
@@ -370,7 +370,8 @@ crate fn print_where_clause<'a, 'tcx: 'a>(
             clause = clause.replace("<br>", &format!("<br>{}", padding));
             clause.insert_str(0, &"&nbsp;".repeat(indent.saturating_sub(1)));
             if !end_newline {
-                clause.insert_str(0, "<br>");
+                // we insert the <br> after a single space but before multiple spaces at the start
+                clause.insert_str(if indent == 0 { 1 } else { 0 }, "<br>");
             }
         }
         write!(f, "{}", clause)
diff --git a/src/test/codegen/asm-clobber_abi.rs b/src/test/codegen/asm-clobber_abi.rs
index 69e3527026655..a87152e0321b9 100644
--- a/src/test/codegen/asm-clobber_abi.rs
+++ b/src/test/codegen/asm-clobber_abi.rs
@@ -6,21 +6,21 @@
 use std::arch::asm;
 
 // CHECK-LABEL: @clobber_sysv64
-// CHECK: ={ax},={cx},={dx},={si},={di},={r8},={r9},={r10},={r11},={xmm0},={xmm1},={xmm2},={xmm3},={xmm4},={xmm5},={xmm6},={xmm7},={xmm8},={xmm9},={xmm10},={xmm11},={xmm12},={xmm13},={xmm14},={xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{k1},~{k2},~{k3},~{k4},~{k5},~{k6},~{k7},~{st},~{st(1)},~{st(2)},~{st(3)},~{st(4)},~{st(5)},~{st(6)},~{st(7)},~{dirflag},~{fpsr},~{flags},~{memory}
+// CHECK: ={ax},={cx},={dx},={si},={di},={r8},={r9},={r10},={r11},={xmm0},={xmm1},={xmm2},={xmm3},={xmm4},={xmm5},={xmm6},={xmm7},={xmm8},={xmm9},={xmm10},={xmm11},={xmm12},={xmm13},={xmm14},={xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{k0},~{k1},~{k2},~{k3},~{k4},~{k5},~{k6},~{k7},~{st},~{st(1)},~{st(2)},~{st(3)},~{st(4)},~{st(5)},~{st(6)},~{st(7)},~{dirflag},~{fpsr},~{flags},~{memory}
 #[no_mangle]
 pub unsafe fn clobber_sysv64() {
     asm!("", clobber_abi("sysv64"));
 }
 
 // CHECK-LABEL: @clobber_win64
-// CHECK: ={ax},={cx},={dx},={r8},={r9},={r10},={r11},={xmm0},={xmm1},={xmm2},={xmm3},={xmm4},={xmm5},={xmm6},={xmm7},={xmm8},={xmm9},={xmm10},={xmm11},={xmm12},={xmm13},={xmm14},={xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{k1},~{k2},~{k3},~{k4},~{k5},~{k6},~{k7},~{st},~{st(1)},~{st(2)},~{st(3)},~{st(4)},~{st(5)},~{st(6)},~{st(7)},~{dirflag},~{fpsr},~{flags},~{memory}
+// CHECK: ={ax},={cx},={dx},={r8},={r9},={r10},={r11},={xmm0},={xmm1},={xmm2},={xmm3},={xmm4},={xmm5},={xmm6},={xmm7},={xmm8},={xmm9},={xmm10},={xmm11},={xmm12},={xmm13},={xmm14},={xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{k0},~{k1},~{k2},~{k3},~{k4},~{k5},~{k6},~{k7},~{st},~{st(1)},~{st(2)},~{st(3)},~{st(4)},~{st(5)},~{st(6)},~{st(7)},~{dirflag},~{fpsr},~{flags},~{memory}
 #[no_mangle]
 pub unsafe fn clobber_win64() {
     asm!("", clobber_abi("win64"));
 }
 
 // CHECK-LABEL: @clobber_sysv64
-// CHECK: =&{dx},={ax},={cx},={si},={di},={r8},={r9},={r10},={r11},={xmm0},={xmm1},={xmm2},={xmm3},={xmm4},={xmm5},={xmm6},={xmm7},={xmm8},={xmm9},={xmm10},={xmm11},={xmm12},={xmm13},={xmm14},={xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{k1},~{k2},~{k3},~{k4},~{k5},~{k6},~{k7},~{st},~{st(1)},~{st(2)},~{st(3)},~{st(4)},~{st(5)},~{st(6)},~{st(7)},~{dirflag},~{fpsr},~{flags},~{memory}
+// CHECK: =&{dx},={ax},={cx},={si},={di},={r8},={r9},={r10},={r11},={xmm0},={xmm1},={xmm2},={xmm3},={xmm4},={xmm5},={xmm6},={xmm7},={xmm8},={xmm9},={xmm10},={xmm11},={xmm12},={xmm13},={xmm14},={xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{k0},~{k1},~{k2},~{k3},~{k4},~{k5},~{k6},~{k7},~{st},~{st(1)},~{st(2)},~{st(3)},~{st(4)},~{st(5)},~{st(6)},~{st(7)},~{dirflag},~{fpsr},~{flags},~{memory}
 #[no_mangle]
 pub unsafe fn clobber_sysv64_edx() {
     let foo: i32;
@@ -28,7 +28,7 @@ pub unsafe fn clobber_sysv64_edx() {
 }
 
 // CHECK-LABEL: @clobber_win64
-// CHECK: =&{dx},={ax},={cx},={r8},={r9},={r10},={r11},={xmm0},={xmm1},={xmm2},={xmm3},={xmm4},={xmm5},={xmm6},={xmm7},={xmm8},={xmm9},={xmm10},={xmm11},={xmm12},={xmm13},={xmm14},={xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{k1},~{k2},~{k3},~{k4},~{k5},~{k6},~{k7},~{st},~{st(1)},~{st(2)},~{st(3)},~{st(4)},~{st(5)},~{st(6)},~{st(7)},~{dirflag},~{fpsr},~{flags},~{memory}
+// CHECK: =&{dx},={ax},={cx},={r8},={r9},={r10},={r11},={xmm0},={xmm1},={xmm2},={xmm3},={xmm4},={xmm5},={xmm6},={xmm7},={xmm8},={xmm9},={xmm10},={xmm11},={xmm12},={xmm13},={xmm14},={xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{k0},~{k1},~{k2},~{k3},~{k4},~{k5},~{k6},~{k7},~{st},~{st(1)},~{st(2)},~{st(3)},~{st(4)},~{st(5)},~{st(6)},~{st(7)},~{dirflag},~{fpsr},~{flags},~{memory}
 #[no_mangle]
 pub unsafe fn clobber_win64_edx() {
     let foo: i32;
diff --git a/src/test/rustdoc/where.SWhere_Simd_item-decl.html b/src/test/rustdoc/where.SWhere_Simd_item-decl.html
new file mode 100644
index 0000000000000..0133bcaeb6673
--- /dev/null
+++ b/src/test/rustdoc/where.SWhere_Simd_item-decl.html
@@ -0,0 +1 @@
+<div class="docblock item-decl"><pre class="rust struct"><code>pub struct Simd&lt;T&gt;(_) <br /><span class="where">where<br />&#160;&#160;&#160;&#160;T: <a class="trait" href="trait.MyTrait.html" title="trait foo::MyTrait">MyTrait</a></span>;</code></pre></div>
\ No newline at end of file
diff --git a/src/test/rustdoc/where.SWhere_TraitWhere_item-decl.html b/src/test/rustdoc/where.SWhere_TraitWhere_item-decl.html
new file mode 100644
index 0000000000000..54026ff034e00
--- /dev/null
+++ b/src/test/rustdoc/where.SWhere_TraitWhere_item-decl.html
@@ -0,0 +1,3 @@
+<div class="docblock item-decl"><pre class="rust trait"><code>pub trait TraitWhere {
+    type <a href="#associatedtype.Item" class="associatedtype">Item</a>&lt;'a&gt;<br />&#160;&#160;&#160; <span class="where">where<br />&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;Self: 'a</span>;
+}</code></pre></div>
\ No newline at end of file
diff --git a/src/test/rustdoc/where.rs b/src/test/rustdoc/where.rs
index 549cfff96cb6d..50a5722fbaff6 100644
--- a/src/test/rustdoc/where.rs
+++ b/src/test/rustdoc/where.rs
@@ -1,3 +1,4 @@
+#![feature(generic_associated_types)]
 #![crate_name = "foo"]
 
 pub trait MyTrait { fn dummy(&self) { } }
@@ -19,6 +20,18 @@ impl<D> Delta<D> where D: MyTrait {
 
 pub struct Echo<E>(E);
 
+// @has 'foo/struct.Simd.html'
+// @snapshot SWhere_Simd_item-decl - '//div[@class="docblock item-decl"]'
+pub struct Simd<T>([T; 1])
+where
+    T: MyTrait;
+
+// @has 'foo/trait.TraitWhere.html'
+// @snapshot SWhere_TraitWhere_item-decl - '//div[@class="docblock item-decl"]'
+pub trait TraitWhere {
+    type Item<'a> where Self: 'a;
+}
+
 // @has foo/struct.Echo.html '//*[@class="impl has-srclink"]//h3[@class="code-header in-band"]' \
 //          "impl<E> MyTrait for Echo<E> where E: MyTrait"
 // @has foo/trait.MyTrait.html '//*[@id="implementors-list"]//h3[@class="code-header in-band"]' \
diff --git a/src/test/ui/asm/x86_64/bad-reg.rs b/src/test/ui/asm/x86_64/bad-reg.rs
index 4c4ce8b5e9e49..272372ebedc1d 100644
--- a/src/test/ui/asm/x86_64/bad-reg.rs
+++ b/src/test/ui/asm/x86_64/bad-reg.rs
@@ -29,13 +29,13 @@ fn main() {
         //~^ ERROR invalid register `rsp`: the stack pointer cannot be used as an operand
         asm!("", in("ip") foo);
         //~^ ERROR invalid register `ip`: the instruction pointer cannot be used as an operand
-        asm!("", in("k0") foo);
-        //~^ ERROR invalid register `k0`: the k0 AVX mask register cannot be used as an operand
 
         asm!("", in("st(2)") foo);
         //~^ ERROR register class `x87_reg` can only be used as a clobber, not as an input or output
         asm!("", in("mm0") foo);
         //~^ ERROR register class `mmx_reg` can only be used as a clobber, not as an input or output
+        asm!("", in("k0") foo);
+        //~^ ERROR register class `kreg0` can only be used as a clobber, not as an input or output
         asm!("", out("st(2)") _);
         asm!("", out("mm0") _);
         asm!("{}", in(x87_reg) foo);
diff --git a/src/test/ui/asm/x86_64/bad-reg.stderr b/src/test/ui/asm/x86_64/bad-reg.stderr
index f8b024e1acd62..84b8b5ec2850b 100644
--- a/src/test/ui/asm/x86_64/bad-reg.stderr
+++ b/src/test/ui/asm/x86_64/bad-reg.stderr
@@ -64,24 +64,24 @@ error: invalid register `ip`: the instruction pointer cannot be used as an opera
 LL |         asm!("", in("ip") foo);
    |                  ^^^^^^^^^^^^
 
-error: invalid register `k0`: the k0 AVX mask register cannot be used as an operand for inline asm
-  --> $DIR/bad-reg.rs:32:18
-   |
-LL |         asm!("", in("k0") foo);
-   |                  ^^^^^^^^^^^^
-
 error: register class `x87_reg` can only be used as a clobber, not as an input or output
-  --> $DIR/bad-reg.rs:35:18
+  --> $DIR/bad-reg.rs:33:18
    |
 LL |         asm!("", in("st(2)") foo);
    |                  ^^^^^^^^^^^^^^^
 
 error: register class `mmx_reg` can only be used as a clobber, not as an input or output
-  --> $DIR/bad-reg.rs:37:18
+  --> $DIR/bad-reg.rs:35:18
    |
 LL |         asm!("", in("mm0") foo);
    |                  ^^^^^^^^^^^^^
 
+error: register class `kreg0` can only be used as a clobber, not as an input or output
+  --> $DIR/bad-reg.rs:37:18
+   |
+LL |         asm!("", in("k0") foo);
+   |                  ^^^^^^^^^^^^
+
 error: register class `x87_reg` can only be used as a clobber, not as an input or output
   --> $DIR/bad-reg.rs:41:20
    |