diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs
index d9b0a15259465..5500541a125f4 100644
--- a/compiler/rustc_codegen_ssa/src/base.rs
+++ b/compiler/rustc_codegen_ssa/src/base.rs
@@ -686,7 +686,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
     // This likely is a temporary measure. Once we don't have to support the
     // non-parallel compiler anymore, we can compile CGUs end-to-end in
     // parallel and get rid of the complicated scheduling logic.
-    let mut pre_compiled_cgus = if tcx.sess.threads() > 1 {
+    let mut pre_compiled_cgus = if rustc_data_structures::sync::is_dyn_thread_safe() {
         tcx.sess.time("compile_first_CGU_batch", || {
             // Try to find one CGU to compile per thread.
             let cgus: Vec<_> = cgu_reuse
diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs
index d5b6a581a79f6..ef053fef5cdc2 100644
--- a/compiler/rustc_const_eval/src/interpret/memory.rs
+++ b/compiler/rustc_const_eval/src/interpret/memory.rs
@@ -121,6 +121,21 @@ pub struct AllocRef<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes = Box<[
     tcx: TyCtxt<'tcx>,
     alloc_id: AllocId,
 }
+
+// FIXME: More information in <https://github.com/rust-lang/rust/pull/106930>.
+unsafe impl<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes> Send
+    for AllocRef<'a, 'tcx, Prov, Extra, Bytes>
+where
+    Allocation<Prov, Extra, Bytes>: Sync,
+{
+}
+unsafe impl<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes> Sync
+    for AllocRef<'a, 'tcx, Prov, Extra, Bytes>
+where
+    Allocation<Prov, Extra, Bytes>: Sync,
+{
+}
+
 /// A reference to some allocation that was already bounds-checked for the given region
 /// and had the on-access machine hooks run.
 pub struct AllocRefMut<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes = Box<[u8]>> {
@@ -130,6 +145,16 @@ pub struct AllocRefMut<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes = Bo
     alloc_id: AllocId,
 }
 
+// FIXME: More information in <https://github.com/rust-lang/rust/pull/106930>.
+impl<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes> !Send
+    for AllocRefMut<'a, 'tcx, Prov, Extra, Bytes>
+{
+}
+impl<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes> !Sync
+    for AllocRefMut<'a, 'tcx, Prov, Extra, Bytes>
+{
+}
+
 impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
     pub fn new() -> Self {
         Memory {
diff --git a/compiler/rustc_const_eval/src/lib.rs b/compiler/rustc_const_eval/src/lib.rs
index c36282d5ed442..fbe7b51c5a5e1 100644
--- a/compiler/rustc_const_eval/src/lib.rs
+++ b/compiler/rustc_const_eval/src/lib.rs
@@ -20,6 +20,7 @@ Rust MIR: a lowered representation of Rust.
 #![feature(try_blocks)]
 #![feature(yeet_expr)]
 #![feature(if_let_guard)]
+#![feature(negative_impls)]
 #![recursion_limit = "256"]
 
 #[macro_use]
diff --git a/compiler/rustc_data_structures/Cargo.toml b/compiler/rustc_data_structures/Cargo.toml
index 78f73d193e380..afd354112bbf0 100644
--- a/compiler/rustc_data_structures/Cargo.toml
+++ b/compiler/rustc_data_structures/Cargo.toml
@@ -51,4 +51,4 @@ features = [
 memmap2 = "0.2.1"
 
 [features]
-rustc_use_parallel_compiler = ["indexmap/rustc-rayon", "rustc-rayon", "rustc-rayon-core"]
+rustc_use_parallel_compiler = ["indexmap/rustc-rayon", "rustc-rayon", "rustc-rayon-core"]
\ No newline at end of file
diff --git a/compiler/rustc_data_structures/src/sharded.rs b/compiler/rustc_data_structures/src/sharded.rs
index 7ed70ba1e0fc7..dc62934d9368a 100644
--- a/compiler/rustc_data_structures/src/sharded.rs
+++ b/compiler/rustc_data_structures/src/sharded.rs
@@ -1,69 +1,236 @@
 use crate::fx::{FxHashMap, FxHasher};
-use crate::sync::{CacheAligned, Lock, LockGuard};
+use crate::sync::{DynSync, LockLike};
+use parking_lot::{Mutex, MutexGuard};
 use std::borrow::Borrow;
+use std::cell::{RefCell, RefMut};
 use std::collections::hash_map::RawEntryMut;
 use std::hash::{Hash, Hasher};
 use std::mem;
 
-#[cfg(parallel_compiler)]
-// 32 shards is sufficient to reduce contention on an 8-core Ryzen 7 1700,
-// but this should be tested on higher core count CPUs. How the `Sharded` type gets used
-// may also affect the ideal number of shards.
-const SHARD_BITS: usize = 5;
+pub trait Shard {
+    type Impl<T>: ShardImpl<T>;
+}
 
-#[cfg(not(parallel_compiler))]
-const SHARD_BITS: usize = 0;
+pub trait ShardImpl<T> {
+    type Lock: LockLike<T>;
 
-pub const SHARDS: usize = 1 << SHARD_BITS;
+    fn new(value: impl FnMut() -> T) -> Self;
+
+    fn get_shard_by_value<K: Hash + ?Sized>(&self, _val: &K) -> &Self::Lock;
+
+    fn get_shard_by_hash(&self, _hash: u64) -> &Self::Lock;
+
+    fn lock_shards(&self) -> Vec<<Self::Lock as LockLike<T>>::LockGuard<'_>>;
+
+    fn try_lock_shards(&self) -> Option<Vec<<Self::Lock as LockLike<T>>::LockGuard<'_>>>;
+}
+
+#[derive(Default)]
+pub struct SingleShard;
+
+impl Shard for SingleShard {
+    type Impl<T> = SingleShardImpl<T>;
+}
 
 /// An array of cache-line aligned inner locked structures with convenience methods.
-pub struct Sharded<T> {
-    shards: [CacheAligned<Lock<T>>; SHARDS],
+pub struct SingleShardImpl<T> {
+    shard: RefCell<T>,
 }
 
-impl<T: Default> Default for Sharded<T> {
+impl<T: Default> Default for SingleShardImpl<T> {
+    #[inline]
+    fn default() -> Self {
+        Self { shard: RefCell::new(T::default()) }
+    }
+}
+
+impl<T> ShardImpl<T> for SingleShardImpl<T> {
+    type Lock = RefCell<T>;
+
+    #[inline]
+    fn new(mut value: impl FnMut() -> T) -> Self {
+        SingleShardImpl { shard: RefCell::new(value()) }
+    }
+
+    #[inline]
+    fn get_shard_by_value<K: Hash + ?Sized>(&self, _val: &K) -> &RefCell<T> {
+        &self.shard
+    }
+
+    #[inline]
+    fn get_shard_by_hash(&self, _hash: u64) -> &RefCell<T> {
+        &self.shard
+    }
+
+    fn lock_shards(&self) -> Vec<RefMut<'_, T>> {
+        vec![self.shard.lock()]
+    }
+
+    fn try_lock_shards(&self) -> Option<Vec<RefMut<'_, T>>> {
+        Some(vec![self.shard.try_lock()?])
+    }
+}
+
+const SHARD_BITS: usize = 5;
+
+pub const SHARDS: usize = 1 << SHARD_BITS;
+
+#[derive(Default)]
+pub struct Sharded;
+
+impl Shard for Sharded {
+    type Impl<T> = ShardedImpl<T>;
+}
+
+#[derive(Default)]
+#[repr(align(64))]
+pub struct CacheAligned<T>(pub T);
+
+pub struct ShardedImpl<T> {
+    shards: [CacheAligned<Mutex<T>>; SHARDS],
+}
+
+impl<T: Default> Default for ShardedImpl<T> {
     #[inline]
     fn default() -> Self {
         Self::new(T::default)
     }
 }
 
-impl<T> Sharded<T> {
+impl<T> ShardImpl<T> for ShardedImpl<T> {
+    type Lock = Mutex<T>;
+
     #[inline]
-    pub fn new(mut value: impl FnMut() -> T) -> Self {
-        Sharded { shards: [(); SHARDS].map(|()| CacheAligned(Lock::new(value()))) }
+    fn new(mut value: impl FnMut() -> T) -> Self {
+        ShardedImpl { shards: [(); SHARDS].map(|()| CacheAligned(Mutex::new(value()))) }
     }
 
     /// The shard is selected by hashing `val` with `FxHasher`.
     #[inline]
-    pub fn get_shard_by_value<K: Hash + ?Sized>(&self, val: &K) -> &Lock<T> {
-        if SHARDS == 1 { &self.shards[0].0 } else { self.get_shard_by_hash(make_hash(val)) }
+    fn get_shard_by_value<K: Hash + ?Sized>(&self, val: &K) -> &Mutex<T> {
+        self.get_shard_by_hash(make_hash(val))
     }
 
     #[inline]
-    pub fn get_shard_by_hash(&self, hash: u64) -> &Lock<T> {
+    fn get_shard_by_hash(&self, hash: u64) -> &Mutex<T> {
         &self.shards[get_shard_index_by_hash(hash)].0
     }
 
+    fn lock_shards(&self) -> Vec<MutexGuard<'_, T>> {
+        (0..SHARDS).map(|i| self.shards[i].0.lock()).collect()
+    }
+
+    fn try_lock_shards(&self) -> Option<Vec<MutexGuard<'_, T>>> {
+        (0..SHARDS).map(|i| self.shards[i].0.try_lock()).collect()
+    }
+}
+
+pub struct DynSharded<T> {
+    single_thread: bool,
+    single_shard: RefCell<T>,
+    parallel_shard: ShardedImpl<T>,
+}
+
+#[cfg(parallel_compiler)]
+unsafe impl<T> DynSync for DynSharded<T> {}
+
+impl<T: Default> Default for DynSharded<T> {
     #[inline]
-    pub fn get_shard_by_index(&self, i: usize) -> &Lock<T> {
-        &self.shards[i].0
+    fn default() -> Self {
+        let single_thread = !crate::sync::is_dyn_thread_safe();
+        DynSharded {
+            single_thread,
+            single_shard: RefCell::new(T::default()),
+            parallel_shard: ShardedImpl::default(),
+        }
     }
+}
 
-    pub fn lock_shards(&self) -> Vec<LockGuard<'_, T>> {
-        (0..SHARDS).map(|i| self.shards[i].0.lock()).collect()
+impl<T: Default> DynSharded<T> {
+    pub fn new(mut value: impl FnMut() -> T) -> Self {
+        if !crate::sync::is_dyn_thread_safe() {
+            DynSharded {
+                single_thread: true,
+                single_shard: RefCell::new(value()),
+                parallel_shard: ShardedImpl::default(),
+            }
+        } else {
+            DynSharded {
+                single_thread: false,
+                single_shard: RefCell::new(T::default()),
+                parallel_shard: ShardedImpl::new(value),
+            }
+        }
     }
 
-    pub fn try_lock_shards(&self) -> Option<Vec<LockGuard<'_, T>>> {
-        (0..SHARDS).map(|i| self.shards[i].0.try_lock()).collect()
+    /// The shard is selected by hashing `val` with `FxHasher`.
+    #[inline]
+    pub fn with_get_shard_by_value<K: Hash + ?Sized, F: FnOnce(&mut T) -> R, R>(
+        &self,
+        val: &K,
+        f: F,
+    ) -> R {
+        if self.single_thread {
+            let mut lock = self.single_shard.borrow_mut();
+            f(&mut *lock)
+        } else {
+            let mut lock = self.parallel_shard.get_shard_by_value(val).lock();
+            f(&mut *lock)
+        }
+    }
+
+    #[inline]
+    pub fn with_get_shard_by_hash<F: FnOnce(&mut T) -> R, R>(&self, hash: u64, f: F) -> R {
+        if self.single_thread {
+            let mut lock = self.single_shard.borrow_mut();
+            f(&mut *lock)
+        } else {
+            let mut lock = self.parallel_shard.get_shard_by_hash(hash).lock();
+            f(&mut *lock)
+        }
+    }
+
+    #[inline]
+    pub fn with_lock_shards<F: FnMut(&mut T) -> R, R>(&self, mut f: F) -> Vec<R> {
+        if self.single_thread {
+            let mut lock = self.single_shard.borrow_mut();
+            vec![f(&mut *lock)]
+        } else {
+            (0..SHARDS).map(|i| f(&mut *self.parallel_shard.shards[i].0.lock())).collect()
+        }
+    }
+
+    #[inline]
+    pub fn with_try_lock_shards<F: FnMut(&mut T) -> R, R>(&self, mut f: F) -> Option<Vec<R>> {
+        if self.single_thread {
+            let mut lock = self.single_shard.try_borrow_mut().ok()?;
+            Some(vec![f(&mut *lock)])
+        } else {
+            (0..SHARDS)
+                .map(|i| {
+                    let mut shard = self.parallel_shard.shards[i].0.try_lock()?;
+                    Some(f(&mut *shard))
+                })
+                .collect()
+        }
+    }
+
+    #[inline]
+    pub fn get_lock_by_value<K: Hash + ?Sized>(&self, val: &K) -> &Mutex<T> {
+        self.parallel_shard.get_shard_by_value(val)
+    }
+
+    #[inline]
+    pub fn get_borrow_by_value<K: Hash + ?Sized>(&self, _val: &K) -> &RefCell<T> {
+        &self.single_shard
     }
 }
 
-pub type ShardedHashMap<K, V> = Sharded<FxHashMap<K, V>>;
+pub type ShardedHashMap<K, V> = DynSharded<FxHashMap<K, V>>;
 
 impl<K: Eq, V> ShardedHashMap<K, V> {
     pub fn len(&self) -> usize {
-        self.lock_shards().iter().map(|shard| shard.len()).sum()
+        self.with_lock_shards(|shard| shard.len()).into_iter().sum()
     }
 }
 
@@ -75,17 +242,18 @@ impl<K: Eq + Hash + Copy> ShardedHashMap<K, ()> {
         Q: Hash + Eq,
     {
         let hash = make_hash(value);
-        let mut shard = self.get_shard_by_hash(hash).lock();
-        let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, value);
-
-        match entry {
-            RawEntryMut::Occupied(e) => *e.key(),
-            RawEntryMut::Vacant(e) => {
-                let v = make();
-                e.insert_hashed_nocheck(hash, v, ());
-                v
+        self.with_get_shard_by_hash(hash, |shard| {
+            let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, value);
+
+            match entry {
+                RawEntryMut::Occupied(e) => *e.key(),
+                RawEntryMut::Vacant(e) => {
+                    let v = make();
+                    e.insert_hashed_nocheck(hash, v, ());
+                    v
+                }
             }
-        }
+        })
     }
 
     #[inline]
@@ -95,17 +263,18 @@ impl<K: Eq + Hash + Copy> ShardedHashMap<K, ()> {
         Q: Hash + Eq,
     {
         let hash = make_hash(&value);
-        let mut shard = self.get_shard_by_hash(hash).lock();
-        let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, &value);
-
-        match entry {
-            RawEntryMut::Occupied(e) => *e.key(),
-            RawEntryMut::Vacant(e) => {
-                let v = make(value);
-                e.insert_hashed_nocheck(hash, v, ());
-                v
+        self.with_get_shard_by_hash(hash, |shard| {
+            let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, &value);
+
+            match entry {
+                RawEntryMut::Occupied(e) => *e.key(),
+                RawEntryMut::Vacant(e) => {
+                    let v = make(value);
+                    e.insert_hashed_nocheck(hash, v, ());
+                    v
+                }
             }
-        }
+        })
     }
 }
 
@@ -117,9 +286,10 @@ pub trait IntoPointer {
 impl<K: Eq + Hash + Copy + IntoPointer> ShardedHashMap<K, ()> {
     pub fn contains_pointer_to<T: Hash + IntoPointer>(&self, value: &T) -> bool {
         let hash = make_hash(&value);
-        let shard = self.get_shard_by_hash(hash).lock();
-        let value = value.into_pointer();
-        shard.raw_entry().from_hash(hash, |entry| entry.into_pointer() == value).is_some()
+        self.with_get_shard_by_hash(hash, |shard| {
+            let value = value.into_pointer();
+            shard.raw_entry().from_hash(hash, |entry| entry.into_pointer() == value).is_some()
+        })
     }
 }
 
diff --git a/compiler/rustc_data_structures/src/sync.rs b/compiler/rustc_data_structures/src/sync.rs
index 6c3197d8ec2c5..f0e41873830dc 100644
--- a/compiler/rustc_data_structures/src/sync.rs
+++ b/compiler/rustc_data_structures/src/sync.rs
@@ -40,13 +40,16 @@
 //! [^2] `MTLockRef` is a typedef.
 
 pub use crate::marker::*;
+use std::cell::{Cell, RefCell, RefMut, UnsafeCell};
 use std::collections::HashMap;
+use std::fmt::{Debug, Formatter};
 use std::hash::{BuildHasher, Hash};
+use std::intrinsics::likely;
+use std::marker::PhantomData;
 use std::ops::{Deref, DerefMut};
 use std::panic::{catch_unwind, resume_unwind, AssertUnwindSafe};
 
 mod worker_local;
-pub use worker_local::{Registry, WorkerLocal};
 
 pub use std::sync::atomic::Ordering;
 pub use std::sync::atomic::Ordering::SeqCst;
@@ -54,16 +57,18 @@ pub use std::sync::atomic::Ordering::SeqCst;
 pub use vec::{AppendOnlyIndexVec, AppendOnlyVec};
 
 mod vec;
+use parking_lot::lock_api::RawMutex as _;
+use parking_lot::lock_api::RawRwLock as _;
+use parking_lot::{Mutex, MutexGuard, RawMutex, RawRwLock};
 
 mod mode {
     use super::Ordering;
     use std::sync::atomic::AtomicU8;
 
-    const UNINITIALIZED: u8 = 0;
     const DYN_NOT_THREAD_SAFE: u8 = 1;
     const DYN_THREAD_SAFE: u8 = 2;
 
-    static DYN_THREAD_SAFE_MODE: AtomicU8 = AtomicU8::new(UNINITIALIZED);
+    static DYN_THREAD_SAFE_MODE: AtomicU8 = AtomicU8::new(DYN_NOT_THREAD_SAFE);
 
     // Whether thread safety is enabled (due to running under multiple threads).
     #[inline]
@@ -78,15 +83,9 @@ mod mode {
     // Only set by the `-Z threads` compile option
     pub fn set_dyn_thread_safe_mode(mode: bool) {
         let set: u8 = if mode { DYN_THREAD_SAFE } else { DYN_NOT_THREAD_SAFE };
-        let previous = DYN_THREAD_SAFE_MODE.compare_exchange(
-            UNINITIALIZED,
-            set,
-            Ordering::Relaxed,
-            Ordering::Relaxed,
-        );
 
-        // Check that the mode was either uninitialized or was already set to the requested mode.
-        assert!(previous.is_ok() || previous == Err(set));
+        // just for speed test
+        DYN_THREAD_SAFE_MODE.store(set, Ordering::Relaxed);
     }
 }
 
@@ -246,20 +245,9 @@ cfg_if! {
 
         pub use std::rc::Rc as Lrc;
         pub use std::rc::Weak as Weak;
-        pub use std::cell::Ref as ReadGuard;
-        pub use std::cell::Ref as MappedReadGuard;
-        pub use std::cell::RefMut as WriteGuard;
-        pub use std::cell::RefMut as MappedWriteGuard;
-        pub use std::cell::RefMut as LockGuard;
-        pub use std::cell::RefMut as MappedLockGuard;
 
         pub use std::cell::OnceCell;
 
-        use std::cell::RefCell as InnerRwLock;
-        use std::cell::RefCell as InnerLock;
-
-        use std::cell::Cell;
-
         pub type MTLockRef<'a, T> = &'a mut MTLock<T>;
 
         #[derive(Debug, Default)]
@@ -303,14 +291,6 @@ cfg_if! {
         pub use std::marker::Send as Send;
         pub use std::marker::Sync as Sync;
 
-        pub use parking_lot::RwLockReadGuard as ReadGuard;
-        pub use parking_lot::MappedRwLockReadGuard as MappedReadGuard;
-        pub use parking_lot::RwLockWriteGuard as WriteGuard;
-        pub use parking_lot::MappedRwLockWriteGuard as MappedWriteGuard;
-
-        pub use parking_lot::MutexGuard as LockGuard;
-        pub use parking_lot::MappedMutexGuard as MappedLockGuard;
-
         pub use std::sync::OnceLock as OnceCell;
 
         pub use std::sync::atomic::{AtomicBool, AtomicUsize, AtomicU32, AtomicU64};
@@ -350,11 +330,6 @@ cfg_if! {
             }
         }
 
-        use parking_lot::Mutex as InnerLock;
-        use parking_lot::RwLock as InnerRwLock;
-
-        use std::thread;
-
         #[inline]
         pub fn join<A, B, RA: DynSend, RB: DynSend>(oper_a: A, oper_b: B) -> (RA, RB)
         where
@@ -436,7 +411,7 @@ cfg_if! {
         ) {
             if mode::is_dyn_thread_safe() {
                 let for_each = FromDyn::from(for_each);
-                let panic: Lock<Option<_>> = Lock::new(None);
+                let panic: Mutex<Option<_>> = Mutex::new(None);
                 t.into_par_iter().for_each(|i| if let Err(p) = catch_unwind(AssertUnwindSafe(|| for_each(i))) {
                     let mut l = panic.lock();
                     if l.is_none() {
@@ -474,7 +449,7 @@ cfg_if! {
             map: impl Fn(I) -> R + DynSync + DynSend
         ) -> C {
             if mode::is_dyn_thread_safe() {
-                let panic: Lock<Option<_>> = Lock::new(None);
+                let panic: Mutex<Option<_>> = Mutex::new(None);
                 let map = FromDyn::from(map);
                 // We catch panics here ensuring that all the loop iterations execute.
                 let r = t.into_par_iter().filter_map(|i| {
@@ -514,14 +489,10 @@ cfg_if! {
                 r
             }
         }
-
-        /// This makes locks panic if they are already held.
-        /// It is only useful when you are running in a single thread
-        const ERROR_CHECKING: bool = false;
     }
 }
 
-#[derive(Default)]
+#[derive(Default, Debug)]
 #[cfg_attr(parallel_compiler, repr(align(64)))]
 pub struct CacheAligned<T>(pub T);
 
@@ -537,53 +508,89 @@ impl<K: Eq + Hash, V: Eq, S: BuildHasher> HashMapExt<K, V> for HashMap<K, V, S>
     }
 }
 
-#[derive(Debug)]
-pub struct Lock<T>(InnerLock<T>);
+/// This makes locks panic if they are already held.
+/// It is only useful when you are running in a single thread
+// const ERROR_CHECKING: bool = false;
+
+pub struct Lock<T> {
+    single_thread: bool,
+    pub(crate) data: UnsafeCell<T>,
+    pub(crate) borrow: Cell<bool>,
+    mutex: RawMutex,
+}
+
+impl<T: Debug> Debug for Lock<T> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self.try_lock() {
+            Some(guard) => f.debug_struct("Lock").field("data", guard.deref()).finish(),
+            None => {
+                struct LockedPlaceholder;
+                impl Debug for LockedPlaceholder {
+                    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+                        f.write_str("<locked>")
+                    }
+                }
+
+                f.debug_struct("Lock").field("data", &LockedPlaceholder).finish()
+            }
+        }
+    }
+}
 
 impl<T> Lock<T> {
-    #[inline(always)]
-    pub fn new(inner: T) -> Self {
-        Lock(InnerLock::new(inner))
+    #[inline]
+    pub fn new(val: T) -> Self {
+        Lock {
+            single_thread: !is_dyn_thread_safe(),
+            data: UnsafeCell::new(val),
+            borrow: Cell::new(false),
+            mutex: RawMutex::INIT,
+        }
     }
 
-    #[inline(always)]
+    #[inline]
     pub fn into_inner(self) -> T {
-        self.0.into_inner()
+        self.data.into_inner()
     }
 
-    #[inline(always)]
+    #[inline]
     pub fn get_mut(&mut self) -> &mut T {
-        self.0.get_mut()
-    }
-
-    #[cfg(parallel_compiler)]
-    #[inline(always)]
-    pub fn try_lock(&self) -> Option<LockGuard<'_, T>> {
-        self.0.try_lock()
+        self.data.get_mut()
     }
 
-    #[cfg(not(parallel_compiler))]
-    #[inline(always)]
+    #[inline]
     pub fn try_lock(&self) -> Option<LockGuard<'_, T>> {
-        self.0.try_borrow_mut().ok()
+        // SAFETY: the `&mut T` is accessible as long as self exists.
+        if likely(self.single_thread) {
+            if self.borrow.get() {
+                None
+            } else {
+                self.borrow.set(true);
+                Some(LockGuard { lock: &self, marker: PhantomData })
+            }
+        } else {
+            if !self.mutex.try_lock() {
+                None
+            } else {
+                Some(LockGuard { lock: &self, marker: PhantomData })
+            }
+        }
     }
 
-    #[cfg(parallel_compiler)]
-    #[inline(always)]
-    #[track_caller]
-    pub fn lock(&self) -> LockGuard<'_, T> {
-        if ERROR_CHECKING {
-            self.0.try_lock().expect("lock was already held")
+    #[inline]
+    fn lock_raw(&self) {
+        if likely(self.single_thread) {
+            assert!(!self.borrow.replace(true));
         } else {
-            self.0.lock()
+            self.mutex.lock();
         }
     }
 
-    #[cfg(not(parallel_compiler))]
     #[inline(always)]
     #[track_caller]
     pub fn lock(&self) -> LockGuard<'_, T> {
-        self.0.borrow_mut()
+        self.lock_raw();
+        LockGuard { lock: &self, marker: PhantomData }
     }
 
     #[inline(always)]
@@ -612,81 +619,463 @@ impl<T: Default> Default for Lock<T> {
     }
 }
 
-#[derive(Debug, Default)]
-pub struct RwLock<T>(InnerRwLock<T>);
+pub struct LockGuard<'a, T> {
+    lock: &'a Lock<T>,
+    marker: PhantomData<&'a mut T>,
+}
+
+impl<T> Deref for LockGuard<'_, T> {
+    type Target = T;
+
+    #[inline(always)]
+    fn deref(&self) -> &T {
+        unsafe { &*self.lock.data.get() }
+    }
+}
+
+impl<T> DerefMut for LockGuard<'_, T> {
+    #[inline(always)]
+    fn deref_mut(&mut self) -> &mut T {
+        unsafe { &mut *self.lock.data.get() }
+    }
+}
+
+#[inline]
+fn unlock_mt<T>(guard: &mut LockGuard<'_, T>) {
+    unsafe { guard.lock.mutex.unlock() }
+}
+
+impl<'a, T> Drop for LockGuard<'a, T> {
+    #[inline]
+    fn drop(&mut self) {
+        if likely(self.lock.single_thread) {
+            debug_assert!(self.lock.borrow.get());
+            self.lock.borrow.set(false);
+        } else {
+            unlock_mt(self)
+        }
+    }
+}
+
+pub trait LockLike<T> {
+    type LockGuard<'a>: DerefMut<Target = T>
+    where
+        Self: 'a;
+
+    fn new(val: T) -> Self;
+
+    fn into_inner(self) -> T;
+
+    fn get_mut(&mut self) -> &mut T;
+
+    fn try_lock(&self) -> Option<Self::LockGuard<'_>>;
+
+    fn lock(&self) -> Self::LockGuard<'_>;
+}
+
+impl<T> LockLike<T> for RefCell<T> {
+    type LockGuard<'a> = RefMut<'a, T> where T: 'a;
+
+    #[inline]
+    fn new(val: T) -> Self {
+        RefCell::new(val)
+    }
+
+    #[inline]
+    fn into_inner(self) -> T {
+        self.into_inner()
+    }
+
+    #[inline]
+    fn get_mut(&mut self) -> &mut T {
+        self.get_mut()
+    }
+
+    #[inline]
+    fn try_lock(&self) -> Option<RefMut<'_, T>> {
+        self.try_borrow_mut().ok()
+    }
+
+    #[inline(always)]
+    #[track_caller]
+    fn lock(&self) -> RefMut<'_, T> {
+        self.borrow_mut()
+    }
+}
+
+impl<T> LockLike<T> for Mutex<T> {
+    type LockGuard<'a> = MutexGuard<'a, T> where T: 'a;
+
+    #[inline]
+    fn new(val: T) -> Self {
+        Mutex::new(val)
+    }
+
+    #[inline]
+    fn into_inner(self) -> T {
+        self.into_inner()
+    }
+
+    #[inline]
+    fn get_mut(&mut self) -> &mut T {
+        self.get_mut()
+    }
+
+    #[inline]
+    fn try_lock(&self) -> Option<MutexGuard<'_, T>> {
+        self.try_lock()
+    }
+
+    #[inline(always)]
+    #[track_caller]
+    fn lock(&self) -> MutexGuard<'_, T> {
+        self.lock()
+    }
+}
+
+pub struct MappedReadGuard<'a, T: ?Sized> {
+    raw: &'a RwLockRaw,
+    data: *const T,
+    marker: PhantomData<&'a T>,
+}
+
+unsafe impl<T: ?Sized + Sync> std::marker::Send for MappedReadGuard<'_, T> {}
+unsafe impl<T: ?Sized + Sync> std::marker::Sync for MappedReadGuard<'_, T> {}
+
+impl<'a, T: 'a + ?Sized> MappedReadGuard<'a, T> {
+    #[inline]
+    pub fn map<U: ?Sized, F>(s: Self, f: F) -> MappedReadGuard<'a, U>
+    where
+        F: FnOnce(&T) -> &U,
+    {
+        let raw = s.raw;
+        let data = f(unsafe { &*s.data });
+        std::mem::forget(s);
+        MappedReadGuard { raw, data, marker: PhantomData }
+    }
+}
+
+impl<'a, T: 'a + ?Sized> Deref for MappedReadGuard<'a, T> {
+    type Target = T;
+    #[inline]
+    fn deref(&self) -> &T {
+        unsafe { &*self.data }
+    }
+}
+
+impl<'a, T: 'a + ?Sized> Drop for MappedReadGuard<'a, T> {
+    #[inline]
+    fn drop(&mut self) {
+        if likely(self.raw.single_thread) {
+            let i = self.raw.borrow.get();
+            debug_assert!(i > 0);
+            self.raw.borrow.set(i - 1);
+        } else {
+            // Safety: An RwLockReadGuard always holds a shared lock.
+            unsafe {
+                self.raw.raw.unlock_shared();
+            }
+        }
+    }
+}
+
+pub struct MappedWriteGuard<'a, T: ?Sized> {
+    raw: &'a RwLockRaw,
+    data: *mut T,
+    marker: PhantomData<&'a mut T>,
+}
+
+unsafe impl<T: ?Sized + Sync> std::marker::Send for MappedWriteGuard<'_, T> {}
+
+impl<'a, T: 'a + ?Sized> MappedWriteGuard<'a, T> {
+    #[inline]
+    pub fn map<U: ?Sized, F>(s: Self, f: F) -> MappedWriteGuard<'a, U>
+    where
+        F: FnOnce(&mut T) -> &mut U,
+    {
+        let raw = s.raw;
+        let data = f(unsafe { &mut *s.data });
+        std::mem::forget(s);
+        MappedWriteGuard { raw, data, marker: PhantomData }
+    }
+}
+
+impl<'a, T: 'a + ?Sized> Deref for MappedWriteGuard<'a, T> {
+    type Target = T;
+    #[inline]
+    fn deref(&self) -> &T {
+        unsafe { &*self.data }
+    }
+}
+
+impl<'a, T: 'a + ?Sized> DerefMut for MappedWriteGuard<'a, T> {
+    #[inline]
+    fn deref_mut(&mut self) -> &mut T {
+        unsafe { &mut *self.data }
+    }
+}
+
+impl<'a, T: 'a + ?Sized> Drop for MappedWriteGuard<'a, T> {
+    #[inline]
+    fn drop(&mut self) {
+        if likely(self.raw.single_thread) {
+            assert_eq!(self.raw.borrow.replace(0), -1);
+        } else {
+            // Safety: An RwLockReadGuard always holds a shared lock.
+            unsafe {
+                self.raw.raw.unlock_exclusive();
+            }
+        }
+    }
+}
+
+pub struct ReadGuard<'a, T> {
+    rwlock: &'a RwLock<T>,
+    marker: PhantomData<&'a T>,
+}
+
+impl<'a, T: 'a> ReadGuard<'a, T> {
+    pub fn map<U: ?Sized, F>(s: Self, f: F) -> MappedReadGuard<'a, U>
+    where
+        F: FnOnce(&T) -> &U,
+    {
+        let raw = &s.rwlock.raw;
+        let data = f(unsafe { &*s.rwlock.data.get() });
+        std::mem::forget(s);
+        MappedReadGuard { raw, data, marker: PhantomData }
+    }
+}
+
+impl<'a, T: 'a> Deref for ReadGuard<'a, T> {
+    type Target = T;
+    #[inline]
+    fn deref(&self) -> &T {
+        unsafe { &*self.rwlock.data.get() }
+    }
+}
+
+impl<'a, T: 'a> Drop for ReadGuard<'a, T> {
+    #[inline]
+    fn drop(&mut self) {
+        if likely(self.rwlock.raw.single_thread) {
+            let i = self.rwlock.raw.borrow.get();
+            debug_assert!(i > 0);
+            self.rwlock.raw.borrow.set(i - 1);
+        } else {
+            // Safety: An RwLockReadGuard always holds a shared lock.
+            unsafe {
+                self.rwlock.raw.raw.unlock_shared();
+            }
+        }
+    }
+}
+
+pub struct WriteGuard<'a, T> {
+    rwlock: &'a RwLock<T>,
+    marker: PhantomData<&'a mut T>,
+}
+
+impl<'a, T: 'a> WriteGuard<'a, T> {
+    pub fn map<U: ?Sized, F>(s: Self, f: F) -> MappedWriteGuard<'a, U>
+    where
+        F: FnOnce(&mut T) -> &mut U,
+    {
+        let raw = &s.rwlock.raw;
+        let data = f(unsafe { &mut *s.rwlock.data.get() });
+        std::mem::forget(s);
+        MappedWriteGuard { raw, data, marker: PhantomData }
+    }
+}
+
+impl<'a, T: 'a> Deref for WriteGuard<'a, T> {
+    type Target = T;
+    #[inline]
+    fn deref(&self) -> &T {
+        unsafe { &*self.rwlock.data.get() }
+    }
+}
+
+impl<'a, T: 'a> DerefMut for WriteGuard<'a, T> {
+    #[inline]
+    fn deref_mut(&mut self) -> &mut T {
+        unsafe { &mut *self.rwlock.data.get() }
+    }
+}
+
+impl<'a, T: 'a> Drop for WriteGuard<'a, T> {
+    #[inline]
+    fn drop(&mut self) {
+        if likely(self.rwlock.raw.single_thread) {
+            assert_eq!(self.rwlock.raw.borrow.replace(0), -1);
+        } else {
+            // Safety: An RwLockWriteGuard always holds an exclusive lock.
+            unsafe {
+                self.rwlock.raw.raw.unlock_exclusive();
+            }
+        }
+    }
+}
+
+struct RwLockRaw {
+    single_thread: bool,
+    borrow: Cell<isize>,
+    raw: RawRwLock,
+}
+
+pub struct RwLock<T> {
+    raw: RwLockRaw,
+    data: UnsafeCell<T>,
+}
+
+// just for speed test
+unsafe impl<T> std::marker::Send for RwLock<T> {}
+unsafe impl<T> std::marker::Sync for RwLock<T> {}
+
+impl<T: Debug> Debug for RwLock<T> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("Lock").field("data", self.read().deref()).finish()
+    }
+}
+
+impl<T: Default> Default for RwLock<T> {
+    fn default() -> Self {
+        RwLock {
+            raw: RwLockRaw {
+                single_thread: !is_dyn_thread_safe(),
+                borrow: Cell::new(0),
+                raw: RawRwLock::INIT,
+            },
+
+            data: UnsafeCell::new(T::default()),
+        }
+    }
+}
 
 impl<T> RwLock<T> {
     #[inline(always)]
     pub fn new(inner: T) -> Self {
-        RwLock(InnerRwLock::new(inner))
+        RwLock {
+            raw: RwLockRaw {
+                single_thread: !is_dyn_thread_safe(),
+                borrow: Cell::new(0),
+                raw: RawRwLock::INIT,
+            },
+
+            data: UnsafeCell::new(inner),
+        }
     }
 
     #[inline(always)]
     pub fn into_inner(self) -> T {
-        self.0.into_inner()
+        self.data.into_inner()
     }
 
     #[inline(always)]
     pub fn get_mut(&mut self) -> &mut T {
-        self.0.get_mut()
+        self.data.get_mut()
     }
 
-    #[cfg(not(parallel_compiler))]
-    #[inline(always)]
-    #[track_caller]
-    pub fn read(&self) -> ReadGuard<'_, T> {
-        self.0.borrow()
+    #[inline]
+    fn mt_read(&self) -> ReadGuard<'_, T> {
+        self.raw.raw.lock_shared();
+        ReadGuard { rwlock: self, marker: PhantomData }
     }
 
-    #[cfg(parallel_compiler)]
     #[inline(always)]
     pub fn read(&self) -> ReadGuard<'_, T> {
-        if ERROR_CHECKING {
-            self.0.try_read().expect("lock was already held")
+        if likely(self.raw.single_thread) {
+            let b = self.raw.borrow.get();
+            assert!(b >= 0);
+            self.raw.borrow.set(b + 1);
+            ReadGuard { rwlock: self, marker: PhantomData }
         } else {
-            self.0.read()
+            self.mt_read()
+        }
+    }
+
+    #[inline]
+    fn with_mt_read_lock<F: FnOnce(&T) -> R, R>(&self, f: F) -> R {
+        self.raw.raw.lock_shared();
+        let r = unsafe { f(&*self.data.get()) };
+        unsafe {
+            self.raw.raw.unlock_shared();
         }
+        r
     }
 
     #[inline(always)]
     #[track_caller]
     pub fn with_read_lock<F: FnOnce(&T) -> R, R>(&self, f: F) -> R {
-        f(&*self.read())
+        if likely(self.raw.single_thread) {
+            let b = self.raw.borrow.get();
+            assert!(b >= 0);
+            self.raw.borrow.set(b + 1);
+            let r = unsafe { f(&*self.data.get()) };
+            self.raw.borrow.set(b);
+            r
+        } else {
+            self.with_mt_read_lock(f)
+        }
     }
 
-    #[cfg(not(parallel_compiler))]
     #[inline(always)]
     pub fn try_write(&self) -> Result<WriteGuard<'_, T>, ()> {
-        self.0.try_borrow_mut().map_err(|_| ())
+        if likely(self.raw.single_thread) {
+            let b = self.raw.borrow.get();
+            if b != 0 {
+                Err(())
+            } else {
+                self.raw.borrow.set(-1);
+                Ok(WriteGuard { rwlock: self, marker: PhantomData })
+            }
+        } else {
+            if self.raw.raw.try_lock_exclusive() {
+                Ok(WriteGuard { rwlock: self, marker: PhantomData })
+            } else {
+                Err(())
+            }
+        }
     }
 
-    #[cfg(parallel_compiler)]
-    #[inline(always)]
-    pub fn try_write(&self) -> Result<WriteGuard<'_, T>, ()> {
-        self.0.try_write().ok_or(())
+    #[inline]
+    fn mt_write(&self) -> WriteGuard<'_, T> {
+        self.raw.raw.lock_exclusive();
+        WriteGuard { rwlock: self, marker: PhantomData }
     }
 
-    #[cfg(not(parallel_compiler))]
     #[inline(always)]
-    #[track_caller]
     pub fn write(&self) -> WriteGuard<'_, T> {
-        self.0.borrow_mut()
+        if likely(self.raw.single_thread) {
+            assert_eq!(self.raw.borrow.replace(-1), 0);
+            WriteGuard { rwlock: self, marker: PhantomData }
+        } else {
+            self.mt_write()
+        }
     }
 
-    #[cfg(parallel_compiler)]
-    #[inline(always)]
-    pub fn write(&self) -> WriteGuard<'_, T> {
-        if ERROR_CHECKING {
-            self.0.try_write().expect("lock was already held")
-        } else {
-            self.0.write()
+    #[inline]
+    pub fn with_mt_write_lock<F: FnOnce(&mut T) -> R, R>(&self, f: F) -> R {
+        self.raw.raw.lock_exclusive();
+        unsafe {
+            let r = f(&mut *self.data.get());
+            self.raw.raw.unlock_exclusive();
+            r
         }
     }
 
     #[inline(always)]
     #[track_caller]
     pub fn with_write_lock<F: FnOnce(&mut T) -> R, R>(&self, f: F) -> R {
-        f(&mut *self.write())
+        if likely(self.raw.single_thread) {
+            let b = self.raw.borrow.get();
+            assert!(b >= 0);
+            self.raw.borrow.set(b + 1);
+            let r = unsafe { f(&mut *self.data.get()) };
+            self.raw.borrow.set(b);
+            r
+        } else {
+            self.with_mt_write_lock(f)
+        }
     }
 
     #[inline(always)]
@@ -701,13 +1090,6 @@ impl<T> RwLock<T> {
         self.write()
     }
 
-    #[cfg(not(parallel_compiler))]
-    #[inline(always)]
-    pub fn leak(&self) -> &T {
-        ReadGuard::leak(self.read())
-    }
-
-    #[cfg(parallel_compiler)]
     #[inline(always)]
     pub fn leak(&self) -> &T {
         let guard = self.read();
@@ -725,34 +1107,64 @@ impl<T: Clone> Clone for RwLock<T> {
     }
 }
 
+#[derive(Debug)]
+pub struct WorkerLocal<T> {
+    single_thread: bool,
+    inner: Option<T>,
+    mt_inner: Option<worker_local::WorkerLocal<T>>,
+}
+
+impl<T> WorkerLocal<T> {
+    /// Creates a new worker local where the `initial` closure computes the
+    /// value this worker local should take for each thread in the thread pool.
+    #[inline]
+    pub fn new<F: FnMut(usize) -> T>(mut f: F) -> WorkerLocal<T> {
+        if !is_dyn_thread_safe() {
+            WorkerLocal { single_thread: true, inner: Some(f(0)), mt_inner: None }
+        } else {
+            WorkerLocal {
+                single_thread: false,
+                inner: None,
+                mt_inner: Some(worker_local::WorkerLocal::new(f)),
+            }
+        }
+    }
+}
+
+impl<T> Deref for WorkerLocal<T> {
+    type Target = T;
+
+    #[inline(always)]
+    fn deref(&self) -> &T {
+        if self.single_thread {
+            self.inner.as_ref().unwrap()
+        } else {
+            self.mt_inner.as_ref().unwrap().deref()
+        }
+    }
+}
+
+use std::thread;
+pub use worker_local::Registry;
+
 /// A type which only allows its inner value to be used in one thread.
 /// It will panic if it is used on multiple threads.
 #[derive(Debug)]
 pub struct OneThread<T> {
-    #[cfg(parallel_compiler)]
+    single_thread: bool,
     thread: thread::ThreadId,
     inner: T,
 }
 
-#[cfg(parallel_compiler)]
-unsafe impl<T> std::marker::Sync for OneThread<T> {}
-#[cfg(parallel_compiler)]
-unsafe impl<T> std::marker::Send for OneThread<T> {}
-
 impl<T> OneThread<T> {
     #[inline(always)]
     fn check(&self) {
-        #[cfg(parallel_compiler)]
-        assert_eq!(thread::current().id(), self.thread);
+        assert!(self.single_thread || thread::current().id() == self.thread);
     }
 
     #[inline(always)]
     pub fn new(inner: T) -> Self {
-        OneThread {
-            #[cfg(parallel_compiler)]
-            thread: thread::current().id(),
-            inner,
-        }
+        OneThread { single_thread: !is_dyn_thread_safe(), thread: thread::current().id(), inner }
     }
 
     #[inline(always)]
diff --git a/compiler/rustc_data_structures/src/sync/worker_local.rs b/compiler/rustc_data_structures/src/sync/worker_local.rs
index bfb04ba8a73f4..6de44f01097c2 100644
--- a/compiler/rustc_data_structures/src/sync/worker_local.rs
+++ b/compiler/rustc_data_structures/src/sync/worker_local.rs
@@ -1,11 +1,10 @@
-use crate::sync::Lock;
+use parking_lot::Mutex;
 use std::cell::Cell;
 use std::cell::OnceCell;
 use std::ops::Deref;
 use std::ptr;
 use std::sync::Arc;
 
-#[cfg(parallel_compiler)]
 use {crate::cold_path, crate::sync::CacheAligned};
 
 /// A pointer to the `RegistryData` which uniquely identifies a registry.
@@ -21,7 +20,6 @@ impl RegistryId {
     ///
     /// Note that there's a race possible where the identifer in `THREAD_DATA` could be reused
     /// so this can succeed from a different registry.
-    #[cfg(parallel_compiler)]
     fn verify(self) -> usize {
         let (id, index) = THREAD_DATA.with(|data| (data.registry_id.get(), data.index.get()));
 
@@ -33,13 +31,14 @@ impl RegistryId {
     }
 }
 
+#[derive(Debug)]
 struct RegistryData {
     thread_limit: usize,
-    threads: Lock<usize>,
+    threads: Mutex<usize>,
 }
 
 /// Represents a list of threads which can access worker locals.
-#[derive(Clone)]
+#[derive(Clone, Debug)]
 pub struct Registry(Arc<RegistryData>);
 
 thread_local! {
@@ -65,7 +64,7 @@ thread_local! {
 impl Registry {
     /// Creates a registry which can hold up to `thread_limit` threads.
     pub fn new(thread_limit: usize) -> Self {
-        Registry(Arc::new(RegistryData { thread_limit, threads: Lock::new(0) }))
+        Registry(Arc::new(RegistryData { thread_limit, threads: Mutex::new(0) }))
     }
 
     /// Gets the registry associated with the current thread. Panics if there's no such registry.
@@ -105,12 +104,9 @@ impl Registry {
 /// Holds worker local values for each possible thread in a registry. You can only access the
 /// worker local value through the `Deref` impl on the registry associated with the thread it was
 /// created on. It will panic otherwise.
-pub struct WorkerLocal<T> {
-    #[cfg(not(parallel_compiler))]
-    local: T,
-    #[cfg(parallel_compiler)]
+#[derive(Debug)]
+pub(crate) struct WorkerLocal<T> {
     locals: Box<[CacheAligned<T>]>,
-    #[cfg(parallel_compiler)]
     registry: Registry,
 }
 
@@ -118,7 +114,6 @@ pub struct WorkerLocal<T> {
 // or it will panic for threads without an associated local. So there isn't a need for `T` to do
 // it's own synchronization. The `verify` method on `RegistryId` has an issue where the the id
 // can be reused, but `WorkerLocal` has a reference to `Registry` which will prevent any reuse.
-#[cfg(parallel_compiler)]
 unsafe impl<T: Send> Sync for WorkerLocal<T> {}
 
 impl<T> WorkerLocal<T> {
@@ -126,52 +121,18 @@ impl<T> WorkerLocal<T> {
     /// value this worker local should take for each thread in the registry.
     #[inline]
     pub fn new<F: FnMut(usize) -> T>(mut initial: F) -> WorkerLocal<T> {
-        #[cfg(parallel_compiler)]
-        {
-            let registry = Registry::current();
-            WorkerLocal {
-                locals: (0..registry.0.thread_limit).map(|i| CacheAligned(initial(i))).collect(),
-                registry,
-            }
-        }
-        #[cfg(not(parallel_compiler))]
-        {
-            WorkerLocal { local: initial(0) }
-        }
-    }
-
-    /// Returns the worker-local values for each thread
-    #[inline]
-    pub fn into_inner(self) -> impl Iterator<Item = T> {
-        #[cfg(parallel_compiler)]
-        {
-            self.locals.into_vec().into_iter().map(|local| local.0)
-        }
-        #[cfg(not(parallel_compiler))]
-        {
-            std::iter::once(self.local)
+        let registry = Registry::current();
+        WorkerLocal {
+            locals: (0..registry.0.thread_limit).map(|i| CacheAligned(initial(i))).collect(),
+            registry,
         }
     }
 }
 
-impl<T> WorkerLocal<Vec<T>> {
-    /// Joins the elements of all the worker locals into one Vec
-    pub fn join(self) -> Vec<T> {
-        self.into_inner().into_iter().flat_map(|v| v).collect()
-    }
-}
-
 impl<T> Deref for WorkerLocal<T> {
     type Target = T;
 
     #[inline(always)]
-    #[cfg(not(parallel_compiler))]
-    fn deref(&self) -> &T {
-        &self.local
-    }
-
-    #[inline(always)]
-    #[cfg(parallel_compiler)]
     fn deref(&self) -> &T {
         // This is safe because `verify` will only return values less than
         // `self.registry.thread_limit` which is the size of the `self.locals` array.
diff --git a/compiler/rustc_driver_impl/src/lib.rs b/compiler/rustc_driver_impl/src/lib.rs
index 80a9dfd251a79..6e7649587e12f 100644
--- a/compiler/rustc_driver_impl/src/lib.rs
+++ b/compiler/rustc_driver_impl/src/lib.rs
@@ -257,7 +257,7 @@ fn run_compiler(
     let sopts = config::build_session_options(&matches);
 
     // Set parallel mode before thread pool creation, which will create `Lock`s.
-    interface::set_thread_safe_mode(&sopts.unstable_opts);
+    interface::set_parallel_mode(&sopts.unstable_opts, &sopts.cg);
 
     if let Some(ref code) = matches.opt_str("explain") {
         handle_explain(diagnostics_registry(), code, sopts.error_format);
diff --git a/compiler/rustc_interface/src/interface.rs b/compiler/rustc_interface/src/interface.rs
index c9e857141c92a..c8450659cf0e6 100644
--- a/compiler/rustc_interface/src/interface.rs
+++ b/compiler/rustc_interface/src/interface.rs
@@ -61,8 +61,18 @@ impl Compiler {
 }
 
 #[allow(rustc::bad_opt_access)]
-pub fn set_thread_safe_mode(sopts: &config::UnstableOptions) {
-    rustc_data_structures::sync::set_dyn_thread_safe_mode(sopts.threads > 1);
+pub fn set_parallel_mode(sopts1: &config::UnstableOptions, sopts2: &config::CodegenOptions) {
+    let parallel = if sopts1.threads <= 1 {
+        false
+    } else {
+        if let Some(path) = &sopts2.incremental {
+            if matches!(std::fs::try_exists(PathBuf::from(path)), Ok(false)) { true } else { false }
+        } else {
+            true
+        }
+    };
+
+    rustc_data_structures::sync::set_dyn_thread_safe_mode(parallel);
 }
 
 /// Converts strings provided as `--cfg [cfgspec]` into a `crate_cfg`.
diff --git a/compiler/rustc_interface/src/lib.rs b/compiler/rustc_interface/src/lib.rs
index 51bd8381e93d7..9ca94a52fddc3 100644
--- a/compiler/rustc_interface/src/lib.rs
+++ b/compiler/rustc_interface/src/lib.rs
@@ -4,6 +4,7 @@
 #![feature(thread_spawn_unchecked)]
 #![feature(lazy_cell)]
 #![feature(try_blocks)]
+#![feature(fs_try_exists)]
 #![recursion_limit = "256"]
 #![allow(rustc::potential_query_instability)]
 #![deny(rustc::untranslatable_diagnostic)]
diff --git a/compiler/rustc_interface/src/util.rs b/compiler/rustc_interface/src/util.rs
index 8d37b1053d800..1b2bb1a848c8a 100644
--- a/compiler/rustc_interface/src/util.rs
+++ b/compiler/rustc_interface/src/util.rs
@@ -6,6 +6,7 @@ use rustc_codegen_ssa::traits::CodegenBackend;
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 #[cfg(parallel_compiler)]
 use rustc_data_structures::sync;
+use rustc_data_structures::sync::FromDyn;
 use rustc_errors::registry::Registry;
 use rustc_parse::validate_attr;
 use rustc_session as session;
@@ -128,12 +129,15 @@ fn get_stack_size() -> Option<usize> {
     env::var_os("RUST_MIN_STACK").is_none().then_some(STACK_SIZE)
 }
 
-#[cfg(not(parallel_compiler))]
 pub(crate) fn run_in_thread_pool_with_globals<F: FnOnce() -> R + Send, R: Send>(
     edition: Edition,
     _threads: usize,
     f: F,
 ) -> R {
+    #[cfg(parallel_compiler)]
+    if rustc_data_structures::sync::is_dyn_thread_safe() {
+        return run_in_threads_pool_with_globals(edition, _threads, f);
+    }
     // The "thread pool" is a single spawned thread in the non-parallel
     // compiler. We run on a spawned thread instead of the main thread (a) to
     // provide control over the stack size, and (b) to increase similarity with
@@ -163,7 +167,7 @@ pub(crate) fn run_in_thread_pool_with_globals<F: FnOnce() -> R + Send, R: Send>(
 }
 
 #[cfg(parallel_compiler)]
-pub(crate) fn run_in_thread_pool_with_globals<F: FnOnce() -> R + Send, R: Send>(
+pub(crate) fn run_in_threads_pool_with_globals<F: FnOnce() -> R + Send, R: Send>(
     edition: Edition,
     threads: usize,
     f: F,
@@ -200,6 +204,7 @@ pub(crate) fn run_in_thread_pool_with_globals<F: FnOnce() -> R + Send, R: Send>(
     // `Send` in the parallel compiler.
     rustc_span::create_session_globals_then(edition, || {
         rustc_span::with_session_globals(|session_globals| {
+            let session_globals = FromDyn::from(session_globals);
             builder
                 .build_scoped(
                     // Initialize each new worker thread when created.
@@ -207,7 +212,9 @@ pub(crate) fn run_in_thread_pool_with_globals<F: FnOnce() -> R + Send, R: Send>(
                         // Register the thread for use with the `WorkerLocal` type.
                         registry.register();
 
-                        rustc_span::set_session_globals_then(session_globals, || thread.run())
+                        rustc_span::set_session_globals_then(session_globals.into_inner(), || {
+                            thread.run()
+                        })
                     },
                     // Run `f` on the first thread in the thread pool.
                     move |pool: &rayon::ThreadPool| pool.install(f),
diff --git a/compiler/rustc_log/src/lib.rs b/compiler/rustc_log/src/lib.rs
index 3cbb2c21e289e..e3d8a31c09080 100644
--- a/compiler/rustc_log/src/lib.rs
+++ b/compiler/rustc_log/src/lib.rs
@@ -82,8 +82,6 @@ pub fn init_env_logger(env: &str) -> Result<(), Error> {
         .with_verbose_exit(verbose_entry_exit)
         .with_verbose_entry(verbose_entry_exit)
         .with_indent_amount(2);
-    #[cfg(all(parallel_compiler, debug_assertions))]
-    let layer = layer.with_thread_ids(true).with_thread_names(true);
 
     let subscriber = tracing_subscriber::Registry::default().with(filter).with(layer);
     match env::var(format!("{env}_BACKTRACE")) {
diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs
index 79eb48a1a3155..f663f24dfb76e 100644
--- a/compiler/rustc_metadata/src/rmeta/encoder.rs
+++ b/compiler/rustc_metadata/src/rmeta/encoder.rs
@@ -2233,7 +2233,7 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
     join(
         || encode_metadata_impl(tcx, path),
         || {
-            if tcx.sess.threads() == 1 {
+            if !rustc_data_structures::sync::is_dyn_thread_safe() {
                 return;
             }
             // Prefetch some queries used by metadata encoding.
diff --git a/compiler/rustc_middle/src/query/mod.rs b/compiler/rustc_middle/src/query/mod.rs
index 21c69662b9ead..e14aec9772ba2 100644
--- a/compiler/rustc_middle/src/query/mod.rs
+++ b/compiler/rustc_middle/src/query/mod.rs
@@ -54,6 +54,7 @@ use rustc_ast::expand::allocator::AllocatorKind;
 use rustc_attr as attr;
 use rustc_data_structures::fingerprint::Fingerprint;
 use rustc_data_structures::fx::{FxHashMap, FxIndexMap, FxIndexSet};
+use rustc_data_structures::sharded::{Shard, Sharded, SingleShard};
 use rustc_data_structures::steal::Steal;
 use rustc_data_structures::svh::Svh;
 use rustc_data_structures::sync::Lrc;
@@ -80,6 +81,7 @@ use rustc_span::symbol::Symbol;
 use rustc_span::{Span, DUMMY_SP};
 use rustc_target::abi;
 use rustc_target::spec::PanicStrategy;
+use std::intrinsics::likely;
 use std::mem;
 use std::ops::Deref;
 use std::path::PathBuf;
diff --git a/compiler/rustc_middle/src/query/plumbing.rs b/compiler/rustc_middle/src/query/plumbing.rs
index 647f4826876da..d7618b971e737 100644
--- a/compiler/rustc_middle/src/query/plumbing.rs
+++ b/compiler/rustc_middle/src/query/plumbing.rs
@@ -10,7 +10,8 @@ use crate::ty::TyCtxt;
 use field_offset::FieldOffset;
 use measureme::StringId;
 use rustc_data_structures::fx::FxHashMap;
-use rustc_data_structures::sync::AtomicU64;
+use rustc_data_structures::sharded::{Sharded, SingleShard};
+use rustc_data_structures::sync::{AtomicU64, DynSync};
 use rustc_hir::def::DefKind;
 use rustc_hir::def_id::{DefId, LocalDefId};
 use rustc_hir::hir_id::OwnerId;
@@ -40,13 +41,14 @@ pub struct QueryStruct<'tcx> {
         Option<fn(TyCtxt<'tcx>, &mut CacheEncoder<'_, 'tcx>, &mut EncodedDepNodeIndex)>,
 }
 
-pub struct DynamicQuery<'tcx, C: QueryCache> {
+pub struct DynamicQuery<'tcx, C: QueryCache, C2: QueryCache<Key = C::Key, Value = C::Value>> {
     pub name: &'static str,
     pub eval_always: bool,
     pub dep_kind: rustc_middle::dep_graph::DepKind,
     pub handle_cycle_error: HandleCycleError,
     pub query_state: FieldOffset<QueryStates<'tcx>, QueryState<C::Key, crate::dep_graph::DepKind>>,
-    pub query_cache: FieldOffset<QueryCaches<'tcx>, C>,
+    pub single_query_cache: FieldOffset<QueryCaches<'tcx, SingleShard>, C>,
+    pub parallel_query_cache: FieldOffset<QueryCaches<'tcx, Sharded>, C2>,
     pub cache_on_disk: fn(tcx: TyCtxt<'tcx>, key: &C::Key) -> bool,
     pub execute_query: fn(tcx: TyCtxt<'tcx>, k: C::Key) -> C::Value,
     pub compute: fn(tcx: TyCtxt<'tcx>, key: C::Key) -> C::Value,
@@ -81,9 +83,12 @@ pub struct QuerySystemFns<'tcx> {
 pub struct QuerySystem<'tcx> {
     pub states: QueryStates<'tcx>,
     pub arenas: QueryArenas<'tcx>,
-    pub caches: QueryCaches<'tcx>,
     pub dynamic_queries: DynamicQueries<'tcx>,
 
+    pub single_thread: bool,
+    pub single_caches: QueryCaches<'tcx, SingleShard>,
+    pub parallel_caches: QueryCaches<'tcx, Sharded>,
+
     /// This provides access to the incremental compilation on-disk cache for query results.
     /// Do not access this directly. It is only meant to be used by
     /// `DepGraph::try_mark_green()` and the query infrastructure.
@@ -95,6 +100,9 @@ pub struct QuerySystem<'tcx> {
     pub jobs: AtomicU64,
 }
 
+#[cfg(parallel_compiler)]
+unsafe impl<'tcx> DynSync for QuerySystem<'tcx> {}
+
 #[derive(Copy, Clone)]
 pub struct TyCtxtAt<'tcx> {
     pub tcx: TyCtxt<'tcx>,
@@ -321,7 +329,7 @@ macro_rules! define_callbacks {
             use super::*;
 
             $(
-                pub type $name<'tcx> = <<$($K)* as Key>::CacheSelector as CacheSelector<'tcx, Erase<$V>>>::Cache;
+                pub type $name<'tcx, S> = <<$($K)* as Key>::CacheSelector as CacheSelector<'tcx, Erase<$V>, S>>::Cache;
             )*
         }
 
@@ -374,21 +382,31 @@ macro_rules! define_callbacks {
         }
 
         #[derive(Default)]
-        pub struct QueryCaches<'tcx> {
-            $($(#[$attr])* pub $name: query_storage::$name<'tcx>,)*
+        pub struct QueryCaches<'tcx, S: Shard> {
+            $($(#[$attr])* pub $name: query_storage::$name<'tcx, S>,)*
         }
 
         impl<'tcx> TyCtxtEnsure<'tcx> {
             $($(#[$attr])*
             #[inline(always)]
             pub fn $name(self, key: query_helper_param_ty!($($K)*)) {
-                query_ensure(
-                    self.tcx,
-                    self.tcx.query_system.fns.engine.$name,
-                    &self.tcx.query_system.caches.$name,
-                    key.into_query_param(),
-                    false,
-                );
+                if likely(self.tcx.query_system.single_thread) {
+                    query_ensure(
+                        self.tcx,
+                        self.tcx.query_system.fns.engine.$name,
+                        &self.tcx.query_system.single_caches.$name,
+                        key.into_query_param(),
+                        false,
+                    )
+                } else {
+                    query_ensure(
+                        self.tcx,
+                        self.tcx.query_system.fns.engine.$name,
+                        &self.tcx.query_system.parallel_caches.$name,
+                        key.into_query_param(),
+                        false,
+                    )
+                };
             })*
         }
 
@@ -396,13 +414,23 @@ macro_rules! define_callbacks {
             $($(#[$attr])*
             #[inline(always)]
             pub fn $name(self, key: query_helper_param_ty!($($K)*)) {
-                query_ensure(
-                    self.tcx,
-                    self.tcx.query_system.fns.engine.$name,
-                    &self.tcx.query_system.caches.$name,
-                    key.into_query_param(),
-                    true,
-                );
+                if likely(self.tcx.query_system.single_thread) {
+                    query_ensure(
+                        self.tcx,
+                        self.tcx.query_system.fns.engine.$name,
+                        &self.tcx.query_system.single_caches.$name,
+                        key.into_query_param(),
+                        true,
+                    );
+                } else {
+                    query_ensure(
+                        self.tcx,
+                        self.tcx.query_system.fns.engine.$name,
+                        &self.tcx.query_system.parallel_caches.$name,
+                        key.into_query_param(),
+                        true,
+                    );
+                };
             })*
         }
 
@@ -421,19 +449,29 @@ macro_rules! define_callbacks {
             #[inline(always)]
             pub fn $name(self, key: query_helper_param_ty!($($K)*)) -> $V
             {
-                restore::<$V>(query_get_at(
-                    self.tcx,
-                    self.tcx.query_system.fns.engine.$name,
-                    &self.tcx.query_system.caches.$name,
-                    self.span,
-                    key.into_query_param(),
-                ))
+                if likely(self.tcx.query_system.single_thread) {
+                    restore::<$V>(query_get_at(
+                        self.tcx,
+                        self.tcx.query_system.fns.engine.$name,
+                        &self.tcx.query_system.single_caches.$name,
+                        self.span,
+                        key.into_query_param(),
+                    ))
+                } else {
+                    restore::<$V>(query_get_at(
+                        self.tcx,
+                        self.tcx.query_system.fns.engine.$name,
+                        &self.tcx.query_system.parallel_caches.$name,
+                        self.span,
+                        key.into_query_param(),
+                    ))
+                }
             })*
         }
 
         pub struct DynamicQueries<'tcx> {
             $(
-                pub $name: DynamicQuery<'tcx, query_storage::$name<'tcx>>,
+                pub $name: DynamicQuery<'tcx, query_storage::$name<'tcx, SingleShard>, query_storage::$name<'tcx, Sharded>>,
             )*
         }
 
@@ -523,38 +561,75 @@ macro_rules! define_feedable {
                 let tcx = self.tcx;
                 let erased = query_provided_to_value::$name(tcx, value);
                 let value = restore::<$V>(erased);
-                let cache = &tcx.query_system.caches.$name;
-
-                let hasher: Option<fn(&mut StableHashingContext<'_>, &_) -> _> = hash_result!([$($modifiers)*]);
-                match try_get_cached(tcx, cache, &key) {
-                    Some(old) => {
-                        let old = restore::<$V>(old);
-                        if let Some(hasher) = hasher {
-                            let (value_hash, old_hash): (Fingerprint, Fingerprint) = tcx.with_stable_hashing_context(|mut hcx|
-                                (hasher(&mut hcx, &value), hasher(&mut hcx, &old))
+                if likely(tcx.query_system.single_thread) {
+                    let cache = &tcx.query_system.single_caches.$name;
+
+                    let hasher: Option<fn(&mut StableHashingContext<'_>, &_) -> _> = hash_result!([$($modifiers)*]);
+                    match try_get_cached(tcx, cache, &key) {
+                        Some(old) => {
+                            let old = restore::<$V>(old);
+                            if let Some(hasher) = hasher {
+                                let (value_hash, old_hash): (Fingerprint, Fingerprint) = tcx.with_stable_hashing_context(|mut hcx|
+                                    (hasher(&mut hcx, &value), hasher(&mut hcx, &old))
+                                );
+                                assert_eq!(
+                                    old_hash, value_hash,
+                                    "Trying to feed an already recorded value for query {} key={key:?}:\nold value: {old:?}\nnew value: {value:?}",
+                                    stringify!($name),
+                                )
+                            } else {
+                                bug!(
+                                    "Trying to feed an already recorded value for query {} key={key:?}:\nold value: {old:?}\nnew value: {value:?}",
+                                    stringify!($name),
+                                )
+                            }
+                        }
+                        None => {
+                            let dep_node = dep_graph::DepNode::construct(tcx, dep_graph::DepKind::$name, &key);
+                            let dep_node_index = tcx.dep_graph.with_feed_task(
+                                dep_node,
+                                tcx,
+                                key,
+                                &value,
+                                hash_result!([$($modifiers)*]),
                             );
-                            assert_eq!(
-                                old_hash, value_hash,
-                                "Trying to feed an already recorded value for query {} key={key:?}:\nold value: {old:?}\nnew value: {value:?}",
-                                stringify!($name),
-                            )
-                        } else {
-                            bug!(
-                                "Trying to feed an already recorded value for query {} key={key:?}:\nold value: {old:?}\nnew value: {value:?}",
-                                stringify!($name),
-                            )
+                            cache.complete(key, erased, dep_node_index);
                         }
                     }
-                    None => {
-                        let dep_node = dep_graph::DepNode::construct(tcx, dep_graph::DepKind::$name, &key);
-                        let dep_node_index = tcx.dep_graph.with_feed_task(
-                            dep_node,
-                            tcx,
-                            key,
-                            &value,
-                            hash_result!([$($modifiers)*]),
-                        );
-                        cache.complete(key, erased, dep_node_index);
+                } else {
+                    let cache = &tcx.query_system.parallel_caches.$name;
+
+                    let hasher: Option<fn(&mut StableHashingContext<'_>, &_) -> _> = hash_result!([$($modifiers)*]);
+                    match try_get_cached(tcx, cache, &key) {
+                        Some(old) => {
+                            let old = restore::<$V>(old);
+                            if let Some(hasher) = hasher {
+                                let (value_hash, old_hash): (Fingerprint, Fingerprint) = tcx.with_stable_hashing_context(|mut hcx|
+                                    (hasher(&mut hcx, &value), hasher(&mut hcx, &old))
+                                );
+                                assert_eq!(
+                                    old_hash, value_hash,
+                                    "Trying to feed an already recorded value for query {} key={key:?}:\nold value: {old:?}\nnew value: {value:?}",
+                                    stringify!($name),
+                                )
+                            } else {
+                                bug!(
+                                    "Trying to feed an already recorded value for query {} key={key:?}:\nold value: {old:?}\nnew value: {value:?}",
+                                    stringify!($name),
+                                )
+                            }
+                        }
+                        None => {
+                            let dep_node = dep_graph::DepNode::construct(tcx, dep_graph::DepKind::$name, &key);
+                            let dep_node_index = tcx.dep_graph.with_feed_task(
+                                dep_node,
+                                tcx,
+                                key,
+                                &value,
+                                hash_result!([$($modifiers)*]),
+                            );
+                            cache.complete(key, erased, dep_node_index);
+                        }
                     }
                 }
             }
diff --git a/compiler/rustc_middle/src/ty/context.rs b/compiler/rustc_middle/src/ty/context.rs
index e84d0100a5cba..01cff1a3e66eb 100644
--- a/compiler/rustc_middle/src/ty/context.rs
+++ b/compiler/rustc_middle/src/ty/context.rs
@@ -1310,26 +1310,28 @@ macro_rules! sty_debug_print {
                 };
                 $(let mut $variant = total;)*
 
-                let shards = tcx.interners.type_.lock_shards();
-                let types = shards.iter().flat_map(|shard| shard.keys());
-                for &InternedInSet(t) in types {
-                    let variant = match t.internee {
-                        ty::Bool | ty::Char | ty::Int(..) | ty::Uint(..) |
-                            ty::Float(..) | ty::Str | ty::Never => continue,
-                        ty::Error(_) => /* unimportant */ continue,
-                        $(ty::$variant(..) => &mut $variant,)*
-                    };
-                    let lt = t.flags.intersects(ty::TypeFlags::HAS_RE_INFER);
-                    let ty = t.flags.intersects(ty::TypeFlags::HAS_TY_INFER);
-                    let ct = t.flags.intersects(ty::TypeFlags::HAS_CT_INFER);
-
-                    variant.total += 1;
-                    total.total += 1;
-                    if lt { total.lt_infer += 1; variant.lt_infer += 1 }
-                    if ty { total.ty_infer += 1; variant.ty_infer += 1 }
-                    if ct { total.ct_infer += 1; variant.ct_infer += 1 }
-                    if lt && ty && ct { total.all_infer += 1; variant.all_infer += 1 }
-                }
+                tcx.interners.type_.with_lock_shards(|shard| {
+                    let types = shard.keys();
+                    for &InternedInSet(t) in types {
+                        let variant = match t.internee {
+                            ty::Bool | ty::Char | ty::Int(..) | ty::Uint(..) |
+                                ty::Float(..) | ty::Str | ty::Never => continue,
+                            ty::Error(_) => /* unimportant */ continue,
+                            $(ty::$variant(..) => &mut $variant,)*
+                        };
+                        let lt = t.flags.intersects(ty::TypeFlags::HAS_RE_INFER);
+                        let ty = t.flags.intersects(ty::TypeFlags::HAS_TY_INFER);
+                        let ct = t.flags.intersects(ty::TypeFlags::HAS_CT_INFER);
+
+                        variant.total += 1;
+                        total.total += 1;
+                        if lt { total.lt_infer += 1; variant.lt_infer += 1 }
+                        if ty { total.ty_infer += 1; variant.ty_infer += 1 }
+                        if ct { total.ct_infer += 1; variant.ct_infer += 1 }
+                        if lt && ty && ct { total.all_infer += 1; variant.all_infer += 1 }
+                    }
+                });
+
                 writeln!(fmt, "Ty interner             total           ty lt ct all")?;
                 $(writeln!(fmt, "    {:18}: {uses:6} {usespc:4.1}%, \
                             {ty:4.1}% {lt:5.1}% {ct:4.1}% {all:4.1}%",
diff --git a/compiler/rustc_middle/src/ty/fold.rs b/compiler/rustc_middle/src/ty/fold.rs
index 25890eb15cde4..e597d2bfcc8b7 100644
--- a/compiler/rustc_middle/src/ty/fold.rs
+++ b/compiler/rustc_middle/src/ty/fold.rs
@@ -21,6 +21,22 @@ where
     pub ct_op: H,
 }
 
+// FIXME: More information in <https://github.com/rust-lang/rust/pull/106930>.
+impl<'tcx, F, G, H> !Send for BottomUpFolder<'tcx, F, G, H>
+where
+    F: FnMut(Ty<'tcx>) -> Ty<'tcx>,
+    G: FnMut(ty::Region<'tcx>) -> ty::Region<'tcx>,
+    H: FnMut(ty::Const<'tcx>) -> ty::Const<'tcx>,
+{
+}
+impl<'tcx, F, G, H> !Sync for BottomUpFolder<'tcx, F, G, H>
+where
+    F: FnMut(Ty<'tcx>) -> Ty<'tcx>,
+    G: FnMut(ty::Region<'tcx>) -> ty::Region<'tcx>,
+    H: FnMut(ty::Const<'tcx>) -> ty::Const<'tcx>,
+{
+}
+
 impl<'tcx, F, G, H> TypeFolder<TyCtxt<'tcx>> for BottomUpFolder<'tcx, F, G, H>
 where
     F: FnMut(Ty<'tcx>) -> Ty<'tcx>,
diff --git a/compiler/rustc_query_impl/src/lib.rs b/compiler/rustc_query_impl/src/lib.rs
index b76734dd07294..b1d9d57429944 100644
--- a/compiler/rustc_query_impl/src/lib.rs
+++ b/compiler/rustc_query_impl/src/lib.rs
@@ -7,6 +7,7 @@
 #![feature(min_specialization)]
 #![feature(never_type)]
 #![feature(rustc_attrs)]
+#![feature(core_intrinsics)]
 #![recursion_limit = "256"]
 #![allow(rustc::potential_query_instability, unused_parens)]
 #![deny(rustc::untranslatable_diagnostic)]
@@ -17,10 +18,10 @@ extern crate rustc_middle;
 
 use crate::plumbing::{encode_all_query_results, try_mark_green};
 use field_offset::offset_of;
+use rustc_data_structures::sharded::{Shard, Sharded, SingleShard};
 use rustc_data_structures::stable_hasher::HashStable;
 use rustc_data_structures::sync::AtomicU64;
 use rustc_middle::arena::Arena;
-use rustc_middle::dep_graph::DepNodeIndex;
 use rustc_middle::dep_graph::{self, DepKind, DepKindStruct};
 use rustc_middle::query::erase::{erase, restore, Erase};
 use rustc_middle::query::on_disk_cache::OnDiskCache;
@@ -31,11 +32,11 @@ use rustc_middle::query::{
     DynamicQueries, ExternProviders, Providers, QueryCaches, QueryEngine, QueryStates,
 };
 use rustc_middle::ty::TyCtxt;
-use rustc_query_system::dep_graph::SerializedDepNodeIndex;
+use rustc_query_system::dep_graph::{DepNodeIndex, SerializedDepNodeIndex};
 use rustc_query_system::ich::StableHashingContext;
 use rustc_query_system::query::{
-    get_query_incr, get_query_non_incr, HashResult, QueryCache, QueryConfig, QueryInfo, QueryMap,
-    QueryMode, QueryState,
+    get_query_incr, get_query_non_incr, HashResult, QueryCache, QueryConfig, QueryContext,
+    QueryInfo, QueryMap, QueryMode, QueryState,
 };
 use rustc_query_system::HandleCycleError;
 use rustc_query_system::Value;
@@ -51,33 +52,52 @@ pub use self::profiling_support::alloc_self_profile_query_strings;
 struct DynamicConfig<
     'tcx,
     C: QueryCache,
+    C2: QueryCache<Key = C::Key, Value = C::Value>,
     const ANON: bool,
     const DEPTH_LIMIT: bool,
     const FEEDABLE: bool,
 > {
-    dynamic: &'tcx DynamicQuery<'tcx, C>,
+    dynamic: &'tcx DynamicQuery<'tcx, C, C2>,
 }
 
-impl<'tcx, C: QueryCache, const ANON: bool, const DEPTH_LIMIT: bool, const FEEDABLE: bool> Copy
-    for DynamicConfig<'tcx, C, ANON, DEPTH_LIMIT, FEEDABLE>
+impl<
+    'tcx,
+    C: QueryCache,
+    C2: QueryCache<Key = C::Key, Value = C::Value>,
+    const ANON: bool,
+    const DEPTH_LIMIT: bool,
+    const FEEDABLE: bool,
+> Copy for DynamicConfig<'tcx, C, C2, ANON, DEPTH_LIMIT, FEEDABLE>
 {
 }
-impl<'tcx, C: QueryCache, const ANON: bool, const DEPTH_LIMIT: bool, const FEEDABLE: bool> Clone
-    for DynamicConfig<'tcx, C, ANON, DEPTH_LIMIT, FEEDABLE>
+impl<
+    'tcx,
+    C: QueryCache,
+    C2: QueryCache<Key = C::Key, Value = C::Value>,
+    const ANON: bool,
+    const DEPTH_LIMIT: bool,
+    const FEEDABLE: bool,
+> Clone for DynamicConfig<'tcx, C, C2, ANON, DEPTH_LIMIT, FEEDABLE>
 {
     fn clone(&self) -> Self {
         DynamicConfig { dynamic: self.dynamic }
     }
 }
 
-impl<'tcx, C: QueryCache, const ANON: bool, const DEPTH_LIMIT: bool, const FEEDABLE: bool>
-    QueryConfig<QueryCtxt<'tcx>> for DynamicConfig<'tcx, C, ANON, DEPTH_LIMIT, FEEDABLE>
+impl<
+    'tcx,
+    C: QueryCache,
+    C2: QueryCache<Key = C::Key, Value = C::Value>,
+    const ANON: bool,
+    const DEPTH_LIMIT: bool,
+    const FEEDABLE: bool,
+> QueryConfig<QueryCtxt<'tcx>> for DynamicConfig<'tcx, C, C2, ANON, DEPTH_LIMIT, FEEDABLE>
 where
     for<'a> C::Key: HashStable<StableHashingContext<'a>>,
 {
     type Key = C::Key;
     type Value = C::Value;
-    type Cache = C;
+    type Cache<S: Shard> = C;
 
     #[inline(always)]
     fn name(self) -> &'static str {
@@ -98,11 +118,49 @@ where
     }
 
     #[inline(always)]
-    fn query_cache<'a>(self, qcx: QueryCtxt<'tcx>) -> &'a Self::Cache
-    where
-        'tcx: 'a,
-    {
-        self.dynamic.query_cache.apply(&qcx.tcx.query_system.caches)
+    fn look_up(self, qcx: QueryCtxt<'tcx>, key: &Self::Key) -> Option<(Self::Value, DepNodeIndex)> {
+        if qcx.single_thread() {
+            self.dynamic.single_query_cache.apply(&qcx.tcx.query_system.single_caches).lookup(key)
+        } else {
+            self.dynamic
+                .parallel_query_cache
+                .apply(&qcx.tcx.query_system.parallel_caches)
+                .lookup(key)
+        }
+    }
+
+    #[inline(always)]
+    fn cache_iter(
+        self,
+        qcx: QueryCtxt<'tcx>,
+        f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex),
+    ) {
+        if qcx.single_thread() {
+            self.dynamic.single_query_cache.apply(&qcx.tcx.query_system.single_caches).iter(f)
+        } else {
+            self.dynamic.parallel_query_cache.apply(&qcx.tcx.query_system.parallel_caches).iter(f)
+        }
+    }
+
+    #[inline(always)]
+    fn complete(
+        self,
+        qcx: QueryCtxt<'tcx>,
+        key: Self::Key,
+        value: Self::Value,
+        index: DepNodeIndex,
+    ) {
+        if qcx.single_thread() {
+            self.dynamic
+                .single_query_cache
+                .apply(&qcx.tcx.query_system.single_caches)
+                .complete(key, value, index)
+        } else {
+            self.dynamic
+                .parallel_query_cache
+                .apply(&qcx.tcx.query_system.parallel_caches)
+                .complete(key, value, index)
+        }
     }
 
     #[inline(always)]
@@ -209,8 +267,11 @@ pub fn query_system<'tcx>(
     QuerySystem {
         states: Default::default(),
         arenas: Default::default(),
-        caches: Default::default(),
         dynamic_queries: dynamic_queries(),
+
+        single_thread: !rustc_data_structures::sync::is_dyn_thread_safe(),
+        single_caches: Default::default(),
+        parallel_caches: Default::default(),
         on_disk_cache,
         fns: QuerySystemFns {
             engine: engine(incremental),
diff --git a/compiler/rustc_query_impl/src/plumbing.rs b/compiler/rustc_query_impl/src/plumbing.rs
index 79d8abc4b694c..2604212b4fd98 100644
--- a/compiler/rustc_query_impl/src/plumbing.rs
+++ b/compiler/rustc_query_impl/src/plumbing.rs
@@ -20,8 +20,7 @@ use rustc_middle::ty::{self, TyCtxt};
 use rustc_query_system::dep_graph::{DepNodeParams, HasDepContext};
 use rustc_query_system::ich::StableHashingContext;
 use rustc_query_system::query::{
-    force_query, QueryCache, QueryConfig, QueryContext, QueryJobId, QueryMap, QuerySideEffects,
-    QueryStackFrame,
+    force_query, QueryConfig, QueryContext, QueryJobId, QueryMap, QuerySideEffects, QueryStackFrame,
 };
 use rustc_query_system::{LayoutOfDepth, QueryOverflow};
 use rustc_serialize::Decodable;
@@ -63,6 +62,11 @@ impl<'tcx> HasDepContext for QueryCtxt<'tcx> {
 }
 
 impl QueryContext for QueryCtxt<'_> {
+    #[inline]
+    fn single_thread(self) -> bool {
+        self.query_system.single_thread
+    }
+
     #[inline]
     fn next_job_id(self) -> QueryJobId {
         QueryJobId(
@@ -354,8 +358,7 @@ pub(crate) fn encode_query_results<'a, 'tcx, Q>(
         qcx.profiler().verbose_generic_activity_with_arg("encode_query_results_for", query.name());
 
     assert!(query.query_state(qcx).all_inactive());
-    let cache = query.query_cache(qcx);
-    cache.iter(&mut |key, value, dep_node| {
+    query.cache_iter(qcx, &mut |key, value, dep_node: DepNodeIndex| {
         if query.cache_on_disk(qcx.tcx, &key) {
             let dep_node = SerializedDepNodeIndex::new(dep_node.index());
 
@@ -568,14 +571,15 @@ macro_rules! define_queries {
             use super::*;
 
             $(
-                pub(super) fn $name<'tcx>() -> DynamicQuery<'tcx, query_storage::$name<'tcx>> {
+                pub(super) fn $name<'tcx>() -> DynamicQuery<'tcx, query_storage::$name<'tcx, SingleShard>, query_storage::$name<'tcx, Sharded>> {
                     DynamicQuery {
                         name: stringify!($name),
                         eval_always: is_eval_always!([$($modifiers)*]),
                         dep_kind: dep_graph::DepKind::$name,
                         handle_cycle_error: handle_cycle_error!([$($modifiers)*]),
                         query_state: offset_of!(QueryStates<'tcx> => $name),
-                        query_cache: offset_of!(QueryCaches<'tcx> => $name),
+                        single_query_cache: offset_of!(QueryCaches<'tcx, SingleShard> => $name),
+                        parallel_query_cache: offset_of!(QueryCaches<'tcx, Sharded> => $name),
                         cache_on_disk: |tcx, key| ::rustc_middle::query::cached::$name(tcx, key),
                         execute_query: |tcx, key| erase(tcx.$name(key)),
                         compute: |tcx, key| query_provided_to_value::$name(
@@ -622,7 +626,8 @@ macro_rules! define_queries {
             type RestoredValue = query_values::$name<'tcx>;
             type Config = DynamicConfig<
                 'tcx,
-                query_storage::$name<'tcx>,
+                query_storage::$name<'tcx, SingleShard>,
+                query_storage::$name<'tcx, Sharded>,
                 { is_anon!([$($modifiers)*]) },
                 { depth_limit!([$($modifiers)*]) },
                 { feedable!([$($modifiers)*]) },
@@ -754,12 +759,21 @@ macro_rules! define_queries {
                     )
                 },
                 alloc_self_profile_query_strings: |tcx, string_cache| {
-                    $crate::profiling_support::alloc_self_profile_query_strings_for_query_cache(
-                        tcx,
-                        stringify!($name),
-                        &tcx.query_system.caches.$name,
-                        string_cache,
-                    )
+                    if tcx.query_system.single_thread {
+                        $crate::profiling_support::alloc_self_profile_query_strings_for_query_cache(
+                            tcx,
+                            stringify!($name),
+                            &tcx.query_system.single_caches.$name,
+                            string_cache,
+                        )
+                    } else {
+                        $crate::profiling_support::alloc_self_profile_query_strings_for_query_cache(
+                            tcx,
+                            stringify!($name),
+                            &tcx.query_system.parallel_caches.$name,
+                            string_cache,
+                        )
+                    }
                 },
                 encode_query_results: expand_if_cached!([$($modifiers)*], |tcx, encoder, query_result_index|
                     $crate::plumbing::encode_query_results::<super::queries::$name<'tcx>>(
diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs
index 8de4d06fe782b..96f8b08af6c74 100644
--- a/compiler/rustc_query_system/src/dep_graph/graph.rs
+++ b/compiler/rustc_query_system/src/dep_graph/graph.rs
@@ -2,7 +2,7 @@ use parking_lot::Mutex;
 use rustc_data_structures::fingerprint::Fingerprint;
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 use rustc_data_structures::profiling::{EventId, QueryInvocationId, SelfProfilerRef};
-use rustc_data_structures::sharded::{self, Sharded};
+use rustc_data_structures::sharded::{DynSharded, SHARDS};
 use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
 use rustc_data_structures::steal::Steal;
 use rustc_data_structures::sync::{AtomicU32, AtomicU64, Lock, Lrc, Ordering};
@@ -631,10 +631,7 @@ impl<K: DepKind> DepGraphData<K> {
         } else {
             self.current
                 .new_node_to_index
-                .get_shard_by_value(dep_node)
-                .lock()
-                .get(dep_node)
-                .copied()
+                .with_get_shard_by_value(dep_node, |node| node.get(dep_node).copied())
         }
     }
 
@@ -698,8 +695,8 @@ impl<K: DepKind> DepGraph<K> {
 
     #[inline(always)]
     pub fn register_dep_node_debug_str<F>(&self, dep_node: DepNode<K>, debug_str_gen: F)
-    where
-        F: FnOnce() -> String,
+        where
+            F: FnOnce() -> String,
     {
         let dep_node_debug = &self.data.as_ref().unwrap().dep_node_debug;
 
@@ -1081,7 +1078,7 @@ rustc_index::newtype_index! {
 /// first, and `data` second.
 pub(super) struct CurrentDepGraph<K: DepKind> {
     encoder: Steal<GraphEncoder<K>>,
-    new_node_to_index: Sharded<FxHashMap<DepNode<K>, DepNodeIndex>>,
+    new_node_to_index: DynSharded<FxHashMap<DepNode<K>, DepNodeIndex>>,
     prev_index_to_index: Lock<IndexVec<SerializedDepNodeIndex, Option<DepNodeIndex>>>,
 
     /// This is used to verify that fingerprints do not change between the creation of a node
@@ -1149,7 +1146,11 @@ impl<K: DepKind> CurrentDepGraph<K> {
         // doesn't inadvertently increase.
         static_assert_size!(Option<DepNodeIndex>, 4);
 
-        let new_node_count_estimate = 102 * prev_graph_node_count / 100 + 200;
+        let mut new_node_count_estimate = 102 * prev_graph_node_count / 100 + 200;
+
+        if rustc_data_structures::sync::is_dyn_thread_safe() {
+            new_node_count_estimate /= SHARDS;
+        }
 
         let node_intern_event_id = profiler
             .get_or_alloc_cached_string("incr_comp_intern_dep_graph_node")
@@ -1162,11 +1163,8 @@ impl<K: DepKind> CurrentDepGraph<K> {
                 record_graph,
                 record_stats,
             )),
-            new_node_to_index: Sharded::new(|| {
-                FxHashMap::with_capacity_and_hasher(
-                    new_node_count_estimate / sharded::SHARDS,
-                    Default::default(),
-                )
+            new_node_to_index: DynSharded::new(|| {
+                FxHashMap::with_capacity_and_hasher(new_node_count_estimate, Default::default())
             }),
             prev_index_to_index: Lock::new(IndexVec::from_elem_n(None, prev_graph_node_count)),
             anon_id_seed,
@@ -1199,16 +1197,16 @@ impl<K: DepKind> CurrentDepGraph<K> {
         edges: EdgesVec,
         current_fingerprint: Fingerprint,
     ) -> DepNodeIndex {
-        let dep_node_index = match self.new_node_to_index.get_shard_by_value(&key).lock().entry(key)
-        {
-            Entry::Occupied(entry) => *entry.get(),
-            Entry::Vacant(entry) => {
-                let dep_node_index =
-                    self.encoder.borrow().send(profiler, key, current_fingerprint, edges);
-                entry.insert(dep_node_index);
-                dep_node_index
-            }
-        };
+        let dep_node_index =
+            self.new_node_to_index.with_get_shard_by_value(&key, |node| match node.entry(key) {
+                Entry::Occupied(entry) => *entry.get(),
+                Entry::Vacant(entry) => {
+                    let dep_node_index =
+                        self.encoder.borrow().send(profiler, key, current_fingerprint, edges);
+                    entry.insert(dep_node_index);
+                    dep_node_index
+                }
+            });
 
         #[cfg(debug_assertions)]
         self.record_edge(dep_node_index, key, current_fingerprint);
@@ -1327,7 +1325,7 @@ impl<K: DepKind> CurrentDepGraph<K> {
     ) {
         let node = &prev_graph.index_to_node(prev_index);
         debug_assert!(
-            !self.new_node_to_index.get_shard_by_value(node).lock().contains_key(node),
+            !self.new_node_to_index.with_get_shard_by_value(node, |lock| lock.contains_key(node)),
             "node from previous graph present in new node collection"
         );
     }
diff --git a/compiler/rustc_query_system/src/query/caches.rs b/compiler/rustc_query_system/src/query/caches.rs
index 9a09f516ec920..295316a53f9cf 100644
--- a/compiler/rustc_query_system/src/query/caches.rs
+++ b/compiler/rustc_query_system/src/query/caches.rs
@@ -2,18 +2,18 @@ use crate::dep_graph::DepNodeIndex;
 
 use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::sharded;
-#[cfg(parallel_compiler)]
-use rustc_data_structures::sharded::Sharded;
-use rustc_data_structures::sync::Lock;
+use rustc_data_structures::sharded::{Shard, ShardImpl};
+use rustc_data_structures::sync::LockLike;
 use rustc_index::{Idx, IndexVec};
 use std::fmt::Debug;
 use std::hash::Hash;
 use std::marker::PhantomData;
 
-pub trait CacheSelector<'tcx, V> {
+pub trait CacheSelector<'tcx, V, S> {
     type Cache
     where
-        V: Copy;
+        V: Copy,
+        S: Shard;
 }
 
 pub trait QueryCache: Sized {
@@ -30,26 +30,23 @@ pub trait QueryCache: Sized {
 
 pub struct DefaultCacheSelector<K>(PhantomData<K>);
 
-impl<'tcx, K: Eq + Hash, V: 'tcx> CacheSelector<'tcx, V> for DefaultCacheSelector<K> {
-    type Cache = DefaultCache<K, V>
+impl<'tcx, K: Eq + Hash, V: 'tcx, S: Shard> CacheSelector<'tcx, V, S> for DefaultCacheSelector<K> {
+    type Cache = DefaultCache<K, V, S>
     where
         V: Copy;
 }
 
-pub struct DefaultCache<K, V> {
-    #[cfg(parallel_compiler)]
-    cache: Sharded<FxHashMap<K, (V, DepNodeIndex)>>,
-    #[cfg(not(parallel_compiler))]
-    cache: Lock<FxHashMap<K, (V, DepNodeIndex)>>,
+pub struct DefaultCache<K, V, S: Shard> {
+    cache: S::Impl<FxHashMap<K, (V, DepNodeIndex)>>,
 }
 
-impl<K, V> Default for DefaultCache<K, V> {
+impl<K, V, S: Shard> Default for DefaultCache<K, V, S> {
     fn default() -> Self {
-        DefaultCache { cache: Default::default() }
+        DefaultCache { cache: S::Impl::new(|| FxHashMap::default()) }
     }
 }
 
-impl<K, V> QueryCache for DefaultCache<K, V>
+impl<K, V, S: Shard> QueryCache for DefaultCache<K, V, S>
 where
     K: Eq + Hash + Copy + Debug,
     V: Copy,
@@ -60,10 +57,9 @@ where
     #[inline(always)]
     fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> {
         let key_hash = sharded::make_hash(key);
-        #[cfg(parallel_compiler)]
+
         let lock = self.cache.get_shard_by_hash(key_hash).lock();
-        #[cfg(not(parallel_compiler))]
-        let lock = self.cache.lock();
+
         let result = lock.raw_entry().from_key_hashed_nocheck(key_hash, key);
 
         if let Some((_, value)) = result { Some(*value) } else { None }
@@ -71,29 +67,17 @@ where
 
     #[inline]
     fn complete(&self, key: K, value: V, index: DepNodeIndex) {
-        #[cfg(parallel_compiler)]
         let mut lock = self.cache.get_shard_by_value(&key).lock();
-        #[cfg(not(parallel_compiler))]
-        let mut lock = self.cache.lock();
+
         // We may be overwriting another value. This is all right, since the dep-graph
         // will check that the fingerprint matches.
         lock.insert(key, (value, index));
     }
 
     fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
-        #[cfg(parallel_compiler)]
-        {
-            let shards = self.cache.lock_shards();
-            for shard in shards.iter() {
-                for (k, v) in shard.iter() {
-                    f(k, &v.0, v.1);
-                }
-            }
-        }
-        #[cfg(not(parallel_compiler))]
-        {
-            let map = self.cache.lock();
-            for (k, v) in map.iter() {
+        let shards = self.cache.lock_shards();
+        for shard in shards.into_iter() {
+            for (k, v) in shard.iter() {
                 f(k, &v.0, v.1);
             }
         }
@@ -102,23 +86,23 @@ where
 
 pub struct SingleCacheSelector;
 
-impl<'tcx, V: 'tcx> CacheSelector<'tcx, V> for SingleCacheSelector {
-    type Cache = SingleCache<V>
+impl<'tcx, V: 'tcx, S: Shard> CacheSelector<'tcx, V, S> for SingleCacheSelector {
+    type Cache = SingleCache<V, S>
     where
         V: Copy;
 }
 
-pub struct SingleCache<V> {
-    cache: Lock<Option<(V, DepNodeIndex)>>,
+pub struct SingleCache<V, S: Shard> {
+    cache: <S::Impl<Option<(V, DepNodeIndex)>> as ShardImpl<Option<(V, DepNodeIndex)>>>::Lock,
 }
 
-impl<V> Default for SingleCache<V> {
+impl<V, S: Shard> Default for SingleCache<V, S> {
     fn default() -> Self {
-        SingleCache { cache: Lock::new(None) }
+        SingleCache { cache: <S::Impl<Option<(V, DepNodeIndex)>> as ShardImpl<Option<(V, DepNodeIndex)>>>::Lock::new(None) }
     }
 }
 
-impl<V> QueryCache for SingleCache<V>
+impl<V, S: Shard> QueryCache for SingleCache<V, S>
 where
     V: Copy,
 {
@@ -144,26 +128,23 @@ where
 
 pub struct VecCacheSelector<K>(PhantomData<K>);
 
-impl<'tcx, K: Idx, V: 'tcx> CacheSelector<'tcx, V> for VecCacheSelector<K> {
-    type Cache = VecCache<K, V>
+impl<'tcx, K: Idx, V: 'tcx, S: Shard> CacheSelector<'tcx, V, S> for VecCacheSelector<K> {
+    type Cache = VecCache<K, V, S>
     where
         V: Copy;
 }
 
-pub struct VecCache<K: Idx, V> {
-    #[cfg(parallel_compiler)]
-    cache: Sharded<IndexVec<K, Option<(V, DepNodeIndex)>>>,
-    #[cfg(not(parallel_compiler))]
-    cache: Lock<IndexVec<K, Option<(V, DepNodeIndex)>>>,
+pub struct VecCache<K: Idx, V, S: Shard> {
+    cache: S::Impl<IndexVec<K, Option<(V, DepNodeIndex)>>>,
 }
 
-impl<K: Idx, V> Default for VecCache<K, V> {
+impl<K: Idx, V, S: Shard> Default for VecCache<K, V, S> {
     fn default() -> Self {
-        VecCache { cache: Default::default() }
+        VecCache { cache: S::Impl::new(|| IndexVec::default()) }
     }
 }
 
-impl<K, V> QueryCache for VecCache<K, V>
+impl<K, V, S: Shard> QueryCache for VecCache<K, V, S>
 where
     K: Eq + Idx + Copy + Debug,
     V: Copy,
@@ -173,38 +154,22 @@ where
 
     #[inline(always)]
     fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> {
-        #[cfg(parallel_compiler)]
         let lock = self.cache.get_shard_by_hash(key.index() as u64).lock();
-        #[cfg(not(parallel_compiler))]
-        let lock = self.cache.lock();
+
         if let Some(Some(value)) = lock.get(*key) { Some(*value) } else { None }
     }
 
     #[inline]
     fn complete(&self, key: K, value: V, index: DepNodeIndex) {
-        #[cfg(parallel_compiler)]
         let mut lock = self.cache.get_shard_by_hash(key.index() as u64).lock();
-        #[cfg(not(parallel_compiler))]
-        let mut lock = self.cache.lock();
+
         lock.insert(key, (value, index));
     }
 
     fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
-        #[cfg(parallel_compiler)]
-        {
-            let shards = self.cache.lock_shards();
-            for shard in shards.iter() {
-                for (k, v) in shard.iter_enumerated() {
-                    if let Some(v) = v {
-                        f(&k, &v.0, v.1);
-                    }
-                }
-            }
-        }
-        #[cfg(not(parallel_compiler))]
-        {
-            let map = self.cache.lock();
-            for (k, v) in map.iter_enumerated() {
+        let shards = self.cache.lock_shards();
+        for shard in shards.iter() {
+            for (k, v) in shard.iter_enumerated() {
                 if let Some(v) = v {
                     f(&k, &v.0, v.1);
                 }
diff --git a/compiler/rustc_query_system/src/query/config.rs b/compiler/rustc_query_system/src/query/config.rs
index 7e47d70120544..72bb673a91021 100644
--- a/compiler/rustc_query_system/src/query/config.rs
+++ b/compiler/rustc_query_system/src/query/config.rs
@@ -1,13 +1,13 @@
 //! Query configuration and description traits.
 
-use crate::dep_graph::{DepNode, DepNodeParams, SerializedDepNodeIndex};
+use crate::dep_graph::{DepNode, DepNodeIndex, DepNodeParams, SerializedDepNodeIndex};
 use crate::error::HandleCycleError;
 use crate::ich::StableHashingContext;
 use crate::query::caches::QueryCache;
-use crate::query::DepNodeIndex;
 use crate::query::{QueryContext, QueryInfo, QueryState};
 
 use rustc_data_structures::fingerprint::Fingerprint;
+use rustc_data_structures::sharded::Shard;
 use std::fmt::Debug;
 use std::hash::Hash;
 
@@ -21,17 +21,18 @@ pub trait QueryConfig<Qcx: QueryContext>: Copy {
     type Key: DepNodeParams<Qcx::DepContext> + Eq + Hash + Copy + Debug;
     type Value: Copy;
 
-    type Cache: QueryCache<Key = Self::Key, Value = Self::Value>;
+    type Cache<S: Shard>: QueryCache<Key = Self::Key, Value = Self::Value>;
 
     fn format_value(self) -> fn(&Self::Value) -> String;
 
-    // Don't use this method to access query results, instead use the methods on TyCtxt
-    fn query_state<'a>(self, tcx: Qcx) -> &'a QueryState<Self::Key, Qcx::DepKind>
-    where
-        Qcx: 'a;
+    fn look_up(self, qcx: Qcx, key: &Self::Key) -> Option<(Self::Value, DepNodeIndex)>;
+
+    fn cache_iter(self, qcx: Qcx, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex));
+
+    fn complete(self, qcx: Qcx, key: Self::Key, value: Self::Value, index: DepNodeIndex);
 
     // Don't use this method to access query results, instead use the methods on TyCtxt
-    fn query_cache<'a>(self, tcx: Qcx) -> &'a Self::Cache
+    fn query_state<'a>(self, tcx: Qcx) -> &'a QueryState<Self::Key, Qcx::DepKind>
     where
         Qcx: 'a;
 
diff --git a/compiler/rustc_query_system/src/query/job.rs b/compiler/rustc_query_system/src/query/job.rs
index 5f2ec656d1d1e..d731ef8121a44 100644
--- a/compiler/rustc_query_system/src/query/job.rs
+++ b/compiler/rustc_query_system/src/query/job.rs
@@ -20,7 +20,6 @@ use {
     parking_lot::{Condvar, Mutex},
     rayon_core,
     rustc_data_structures::fx::FxHashSet,
-    rustc_data_structures::sync::Lock,
     rustc_data_structures::sync::Lrc,
     rustc_data_structures::{jobserver, OnDrop},
     rustc_span::DUMMY_SP,
@@ -124,7 +123,6 @@ impl<D: DepKind> QueryJob<D> {
 }
 
 impl QueryJobId {
-    #[cfg(not(parallel_compiler))]
     pub(super) fn find_cycle_in_stack<D: DepKind>(
         &self,
         query_map: QueryMap<D>,
@@ -190,7 +188,7 @@ struct QueryWaiter<D: DepKind> {
     query: Option<QueryJobId>,
     condvar: Condvar,
     span: Span,
-    cycle: Lock<Option<CycleError<D>>>,
+    cycle: Mutex<Option<CycleError<D>>>,
 }
 
 #[cfg(parallel_compiler)]
@@ -228,7 +226,7 @@ impl<D: DepKind> QueryLatch<D> {
         span: Span,
     ) -> Result<(), CycleError<D>> {
         let waiter =
-            Lrc::new(QueryWaiter { query, span, cycle: Lock::new(None), condvar: Condvar::new() });
+            Lrc::new(QueryWaiter { query, span, cycle: Mutex::new(None), condvar: Condvar::new() });
         self.wait_on_inner(&waiter);
         // FIXME: Get rid of this lock. We have ownership of the QueryWaiter
         // although another thread may still have a Lrc reference so we cannot
@@ -613,7 +611,7 @@ pub(crate) fn report_cycle<'a, D: DepKind>(
 }
 
 pub fn print_query_stack<Qcx: QueryContext>(
-    qcx: Qcx,
+    _qcx: Qcx,
     mut current_query: Option<QueryJobId>,
     handler: &Handler,
     num_frames: Option<usize>,
@@ -622,7 +620,12 @@ pub fn print_query_stack<Qcx: QueryContext>(
     // a panic hook, which means that the global `Handler` may be in a weird
     // state if it was responsible for triggering the panic.
     let mut i = 0;
-    let query_map = qcx.try_collect_active_jobs();
+
+    #[cfg(not(parallel_compiler))]
+    let query_map = _qcx.try_collect_active_jobs();
+
+    #[cfg(parallel_compiler)]
+    let query_map: Option<QueryMap<Qcx::DepKind>> = None;
 
     while let Some(query) = current_query {
         if Some(i) == num_frames {
diff --git a/compiler/rustc_query_system/src/query/mod.rs b/compiler/rustc_query_system/src/query/mod.rs
index f7619d75be768..cc78ed871152d 100644
--- a/compiler/rustc_query_system/src/query/mod.rs
+++ b/compiler/rustc_query_system/src/query/mod.rs
@@ -101,6 +101,8 @@ impl QuerySideEffects {
 }
 
 pub trait QueryContext: HasDepContext {
+    fn single_thread(self) -> bool;
+
     fn next_job_id(self) -> QueryJobId;
 
     /// Get the query information from the TLS context.
diff --git a/compiler/rustc_query_system/src/query/plumbing.rs b/compiler/rustc_query_system/src/query/plumbing.rs
index dbfe62ae6e943..8b7ced1298c5d 100644
--- a/compiler/rustc_query_system/src/query/plumbing.rs
+++ b/compiler/rustc_query_system/src/query/plumbing.rs
@@ -12,12 +12,14 @@ use crate::query::job::{report_cycle, QueryInfo, QueryJob, QueryJobId, QueryJobI
 use crate::query::SerializedDepNodeIndex;
 use crate::query::{QueryContext, QueryMap, QuerySideEffects, QueryStackFrame};
 use crate::HandleCycleError;
+#[cfg(parallel_compiler)]
+use rustc_data_structures::cold_path;
 use rustc_data_structures::fingerprint::Fingerprint;
 use rustc_data_structures::fx::FxHashMap;
+use rustc_data_structures::sharded::{DynSharded, Shard, ShardImpl, Sharded, SingleShard};
 use rustc_data_structures::stack::ensure_sufficient_stack;
-use rustc_data_structures::sync::Lock;
-#[cfg(parallel_compiler)]
-use rustc_data_structures::{cold_path, sharded::Sharded};
+use rustc_data_structures::sync::{Lock, LockLike};
+
 use rustc_errors::{DiagnosticBuilder, ErrorGuaranteed, FatalError};
 use rustc_span::{Span, DUMMY_SP};
 use std::cell::Cell;
@@ -25,15 +27,13 @@ use std::collections::hash_map::Entry;
 use std::fmt::Debug;
 use std::hash::Hash;
 use std::mem;
+use std::ops::DerefMut;
 use thin_vec::ThinVec;
 
 use super::QueryConfig;
 
 pub struct QueryState<K, D: DepKind> {
-    #[cfg(parallel_compiler)]
-    active: Sharded<FxHashMap<K, QueryResult<D>>>,
-    #[cfg(not(parallel_compiler))]
-    active: Lock<FxHashMap<K, QueryResult<D>>>,
+    active: DynSharded<FxHashMap<K, QueryResult<D>>>,
 }
 
 /// Indicates the state of a query for a given key in a query map.
@@ -52,15 +52,8 @@ where
     D: DepKind,
 {
     pub fn all_inactive(&self) -> bool {
-        #[cfg(parallel_compiler)]
-        {
-            let shards = self.active.lock_shards();
-            shards.iter().all(|shard| shard.is_empty())
-        }
-        #[cfg(not(parallel_compiler))]
-        {
-            self.active.lock().is_empty()
-        }
+        let shards = self.active.with_lock_shards(|shard| shard.is_empty());
+        shards.into_iter().all(|empty| empty)
     }
 
     pub fn try_collect_active_jobs<Qcx: Copy>(
@@ -69,33 +62,16 @@ where
         make_query: fn(Qcx, K) -> QueryStackFrame<D>,
         jobs: &mut QueryMap<D>,
     ) -> Option<()> {
-        #[cfg(parallel_compiler)]
-        {
-            // We use try_lock_shards here since we are called from the
-            // deadlock handler, and this shouldn't be locked.
-            let shards = self.active.try_lock_shards()?;
-            for shard in shards.iter() {
-                for (k, v) in shard.iter() {
-                    if let QueryResult::Started(ref job) = *v {
-                        let query = make_query(qcx, *k);
-                        jobs.insert(job.id, QueryJobInfo { query, job: job.clone() });
-                    }
-                }
-            }
-        }
-        #[cfg(not(parallel_compiler))]
-        {
-            // We use try_lock here since we are called from the
-            // deadlock handler, and this shouldn't be locked.
-            // (FIXME: Is this relevant for non-parallel compilers? It doesn't
-            // really hurt much.)
-            for (k, v) in self.active.try_lock()?.iter() {
+        // We use try_lock_shards here since we are called from the
+        // deadlock handler, and this shouldn't be locked.
+        self.active.with_try_lock_shards(|shard| {
+            for (k, v) in shard.iter() {
                 if let QueryResult::Started(ref job) = *v {
                     let query = make_query(qcx, *k);
                     jobs.insert(job.id, QueryJobInfo { query, job: job.clone() });
                 }
             }
-        }
+        });
 
         Some(())
     }
@@ -168,9 +144,14 @@ where
 {
     /// Completes the query by updating the query cache with the `result`,
     /// signals the waiter and forgets the JobOwner, so it won't poison the query
-    fn complete<C>(self, cache: &C, result: C::Value, dep_node_index: DepNodeIndex)
-    where
-        C: QueryCache<Key = K>,
+    fn complete<C, Qcx: QueryContext>(
+        self,
+        qcx: Qcx,
+        query: C,
+        result: C::Value,
+        dep_node_index: DepNodeIndex,
+    ) where
+        C: QueryConfig<Qcx, Key = K>,
     {
         let key = self.key;
         let state = self.state;
@@ -180,17 +161,13 @@ where
 
         // Mark as complete before we remove the job from the active state
         // so no other thread can re-execute this query.
-        cache.complete(key, result, dep_node_index);
+        query.complete(qcx, key, result, dep_node_index);
 
         let job = {
-            #[cfg(parallel_compiler)]
-            let mut lock = state.active.get_shard_by_value(&key).lock();
-            #[cfg(not(parallel_compiler))]
-            let mut lock = state.active.lock();
-            match lock.remove(&key).unwrap() {
+            state.active.with_get_shard_by_value(&key, |lock| match lock.remove(&key).unwrap() {
                 QueryResult::Started(job) => job,
                 QueryResult::Poisoned => panic!(),
-            }
+            })
         };
 
         job.signal_complete();
@@ -208,16 +185,14 @@ where
         // Poison the query so jobs waiting on it panic.
         let state = self.state;
         let job = {
-            #[cfg(parallel_compiler)]
-            let mut shard = state.active.get_shard_by_value(&self.key).lock();
-            #[cfg(not(parallel_compiler))]
-            let mut shard = state.active.lock();
-            let job = match shard.remove(&self.key).unwrap() {
-                QueryResult::Started(job) => job,
-                QueryResult::Poisoned => panic!(),
-            };
-            shard.insert(self.key, QueryResult::Poisoned);
-            job
+            state.active.with_get_shard_by_value(&self.key, |shard| {
+                let job = match shard.remove(&self.key).unwrap() {
+                    QueryResult::Started(job) => job,
+                    QueryResult::Poisoned => panic!(),
+                };
+                shard.insert(self.key, QueryResult::Poisoned);
+                job
+            })
         };
         // Also signal the completion of the job, so waiters
         // will continue execution.
@@ -254,7 +229,6 @@ where
 
 #[cold]
 #[inline(never)]
-#[cfg(not(parallel_compiler))]
 fn cycle_error<Q, Qcx>(
     query: Q,
     qcx: Qcx,
@@ -298,7 +272,7 @@ where
 
     match result {
         Ok(()) => {
-            let Some((v, index)) = query.query_cache(qcx).lookup(&key) else {
+            let Some((v, index)) = query.look_up(qcx, &key) else {
                 cold_path(|| panic!("value must be in cache after waiting"))
             };
 
@@ -319,15 +293,43 @@ fn try_execute_query<Q, Qcx, const INCR: bool>(
     key: Q::Key,
     dep_node: Option<DepNode<Qcx::DepKind>>,
 ) -> (Q::Value, Option<DepNodeIndex>)
+where
+    Q: QueryConfig<Qcx>,
+    Qcx: QueryContext,
+{
+    if qcx.single_thread() {
+        let state = query.query_state(qcx);
+        let state_lock = state.active.get_borrow_by_value(&key);
+        try_execute_query_inner::<Q, Qcx, SingleShard, INCR>(
+            query, qcx, span, key, dep_node, state_lock,
+        )
+    } else {
+        let state = query.query_state(qcx);
+        let state_lock = state.active.get_lock_by_value(&key);
+        try_execute_query_inner::<Q, Qcx, Sharded, INCR>(
+            query, qcx, span, key, dep_node, state_lock,
+        )
+    }
+}
+
+#[inline(always)]
+fn try_execute_query_inner<Q, Qcx, S: Shard, const INCR: bool>(
+    query: Q,
+    qcx: Qcx,
+    span: Span,
+    key: Q::Key,
+    dep_node: Option<DepNode<Qcx::DepKind>>,
+    state_lock: &<S::Impl<FxHashMap<Q::Key, QueryResult<Qcx::DepKind>>> as ShardImpl<
+        FxHashMap<Q::Key, QueryResult<Qcx::DepKind>>,
+    >>::Lock,
+) -> (Q::Value, Option<DepNodeIndex>)
 where
     Q: QueryConfig<Qcx>,
     Qcx: QueryContext,
 {
     let state = query.query_state(qcx);
-    #[cfg(parallel_compiler)]
-    let mut state_lock = state.active.get_shard_by_value(&key).lock();
-    #[cfg(not(parallel_compiler))]
-    let mut state_lock = state.active.lock();
+    let mut state_lock = state_lock.lock();
+    let lock = state_lock.deref_mut();
 
     // For the parallel compiler we need to check both the query cache and query state structures
     // while holding the state lock to ensure that 1) the query has not yet completed and 2) the
@@ -335,8 +337,8 @@ where
     // re-executing the query since `try_start` only checks that the query is not currently
     // executing, but another thread may have already completed the query and stores it result
     // in the query cache.
-    if cfg!(parallel_compiler) && qcx.dep_context().sess().threads() > 1 {
-        if let Some((value, index)) = query.query_cache(qcx).lookup(&key) {
+    if cfg!(parallel_compiler) && rustc_data_structures::sync::is_dyn_thread_safe() {
+        if let Some((value, index)) = query.look_up(qcx, &key) {
             qcx.dep_context().profiler().query_cache_hit(index.into());
             return (value, Some(index));
         }
@@ -344,7 +346,7 @@ where
 
     let current_job_id = qcx.current_query_job();
 
-    match state_lock.entry(key) {
+    match lock.entry(key) {
         Entry::Vacant(entry) => {
             // Nothing has computed or is computing the query, so we start a new job and insert it in the
             // state map.
@@ -370,6 +372,14 @@ where
                 }
                 #[cfg(parallel_compiler)]
                 QueryResult::Started(job) => {
+                    if std::intrinsics::likely(!rustc_data_structures::sync::is_dyn_thread_safe()) {
+                        let id = job.id;
+                        drop(state_lock);
+
+                        // If we are single-threaded we know that we have cycle error,
+                        // so we just return the error.
+                        return cycle_error(query, qcx, id, span);
+                    }
                     // Get the latch out
                     let latch = job.latch();
                     drop(state_lock);
@@ -413,13 +423,12 @@ where
         execute_job_non_incr(query, qcx, key, id)
     };
 
-    let cache = query.query_cache(qcx);
     if query.feedable() {
         // We should not compute queries that also got a value via feeding.
         // This can't happen, as query feeding adds the very dependencies to the fed query
         // as its feeding query had. So if the fed query is red, so is its feeder, which will
         // get evaluated first, and re-feed the query.
-        if let Some((cached_result, _)) = cache.lookup(&key) {
+        if let Some((cached_result, _)) = query.look_up(qcx, &key) {
             let Some(hasher) = query.hash_result() else {
                 panic!(
                     "no_hash fed query later has its value computed.\n\
@@ -445,7 +454,8 @@ where
             );
         }
     }
-    job_owner.complete(cache, result, dep_node_index);
+
+    job_owner.complete(qcx, query, result, dep_node_index);
 
     (result, Some(dep_node_index))
 }
@@ -848,7 +858,7 @@ pub fn force_query<Q, Qcx>(
 {
     // We may be concurrently trying both execute and force a query.
     // Ensure that only one of them runs the query.
-    if let Some((_, index)) = query.query_cache(qcx).lookup(&key) {
+    if let Some((_, index)) = query.look_up(qcx, &key) {
         qcx.dep_context().profiler().query_cache_hit(index.into());
         return;
     }
diff --git a/compiler/rustc_span/src/hygiene.rs b/compiler/rustc_span/src/hygiene.rs
index c669b64dd2c81..fbc2d64534824 100644
--- a/compiler/rustc_span/src/hygiene.rs
+++ b/compiler/rustc_span/src/hygiene.rs
@@ -1278,11 +1278,8 @@ pub fn register_expn_id(
     let expn_id = ExpnId { krate, local_id };
     HygieneData::with(|hygiene_data| {
         let _old_data = hygiene_data.foreign_expn_data.insert(expn_id, data);
-        debug_assert!(_old_data.is_none());
         let _old_hash = hygiene_data.foreign_expn_hashes.insert(expn_id, hash);
-        debug_assert!(_old_hash.is_none());
         let _old_id = hygiene_data.expn_hash_to_expn_id.insert(hash, expn_id);
-        debug_assert!(_old_id.is_none());
     });
     expn_id
 }
diff --git a/src/bootstrap/config.rs b/src/bootstrap/config.rs
index 710c8b52194b4..e51fe601149db 100644
--- a/src/bootstrap/config.rs
+++ b/src/bootstrap/config.rs
@@ -1147,7 +1147,7 @@ impl Config {
             set(&mut config.use_lld, rust.use_lld);
             set(&mut config.lld_enabled, rust.lld);
             set(&mut config.llvm_tools_enabled, rust.llvm_tools);
-            config.rustc_parallel = rust.parallel_compiler.unwrap_or(false);
+            config.rustc_parallel = rust.parallel_compiler.unwrap_or(true);
             config.rustc_default_linker = rust.default_linker;
             config.musl_root = rust.musl_root.map(PathBuf::from);
             config.save_toolstates = rust.save_toolstates.map(PathBuf::from);
diff --git a/src/librustdoc/clean/utils.rs b/src/librustdoc/clean/utils.rs
index 17aa6b38e389c..d28b484cf2125 100644
--- a/src/librustdoc/clean/utils.rs
+++ b/src/librustdoc/clean/utils.rs
@@ -470,11 +470,7 @@ pub(crate) fn get_auto_trait_and_blanket_impls(
     cx: &mut DocContext<'_>,
     item_def_id: DefId,
 ) -> impl Iterator<Item = Item> {
-    // FIXME: To be removed once `parallel_compiler` bugs are fixed!
-    // More information in <https://github.com/rust-lang/rust/pull/106930>.
-    if cfg!(parallel_compiler) {
-        return vec![].into_iter().chain(vec![].into_iter());
-    }
+    // FIXME: More information in <https://github.com/rust-lang/rust/pull/106930>.
 
     let auto_impls = cx
         .sess()
diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs
index 5460bce21a512..d6964b9cf0709 100644
--- a/src/librustdoc/lib.rs
+++ b/src/librustdoc/lib.rs
@@ -212,8 +212,6 @@ fn init_logging() {
         .with_verbose_exit(true)
         .with_verbose_entry(true)
         .with_indent_amount(2);
-    #[cfg(all(parallel_compiler, debug_assertions))]
-    let layer = layer.with_thread_ids(true).with_thread_names(true);
 
     use tracing_subscriber::layer::SubscriberExt;
     let subscriber = tracing_subscriber::Registry::default().with(filter).with(layer);
@@ -740,7 +738,7 @@ fn main_args(at_args: &[String]) -> MainResult {
     };
 
     // Set parallel mode before error handler creation, which will create `Lock`s.
-    interface::set_thread_safe_mode(&options.unstable_opts);
+    interface::set_parallel_mode(&options.unstable_opts, &options.codegen_options);
 
     let diag = core::new_handler(
         options.error_format,
diff --git a/src/librustdoc/passes/lint/check_code_block_syntax.rs b/src/librustdoc/passes/lint/check_code_block_syntax.rs
index 8f873dbe50131..b74d3fe749d5e 100644
--- a/src/librustdoc/passes/lint/check_code_block_syntax.rs
+++ b/src/librustdoc/passes/lint/check_code_block_syntax.rs
@@ -1,5 +1,5 @@
 //! Validates syntax inside Rust code blocks (\`\`\`rust).
-use rustc_data_structures::sync::{Lock, Lrc};
+use rustc_data_structures::sync::Lrc;
 use rustc_errors::{
     emitter::Emitter,
     translation::{to_fluent_args, Translate},
@@ -10,6 +10,7 @@ use rustc_session::parse::ParseSess;
 use rustc_span::hygiene::{AstPass, ExpnData, ExpnKind, LocalExpnId};
 use rustc_span::source_map::{FilePathMapping, SourceMap};
 use rustc_span::{FileName, InnerSpan, DUMMY_SP};
+use std::sync::Mutex;
 
 use crate::clean;
 use crate::core::DocContext;
@@ -32,7 +33,7 @@ fn check_rust_syntax(
     dox: &str,
     code_block: RustCodeBlock,
 ) {
-    let buffer = Lrc::new(Lock::new(Buffer::default()));
+    let buffer = Lrc::new(Mutex::new(Buffer::default()));
     let fallback_bundle = rustc_errors::fallback_fluent_bundle(
         rustc_driver::DEFAULT_LOCALE_RESOURCES.to_vec(),
         false,
@@ -60,7 +61,7 @@ fn check_rust_syntax(
         .is_empty()
     })
     .unwrap_or(false);
-    let buffer = buffer.borrow();
+    let buffer = buffer.lock().unwrap();
 
     if !buffer.has_errors && !is_empty {
         // No errors in a non-empty program.
@@ -138,7 +139,7 @@ struct Buffer {
 }
 
 struct BufferEmitter {
-    buffer: Lrc<Lock<Buffer>>,
+    buffer: Lrc<Mutex<Buffer>>,
     fallback_bundle: LazyFallbackBundle,
 }
 
@@ -154,7 +155,7 @@ impl Translate for BufferEmitter {
 
 impl Emitter for BufferEmitter {
     fn emit_diagnostic(&mut self, diag: &Diagnostic) {
-        let mut buffer = self.buffer.borrow_mut();
+        let mut buffer = self.buffer.lock().unwrap();
 
         let fluent_args = to_fluent_args(diag.args());
         let translated_main_message = self
diff --git a/src/tools/clippy/clippy_utils/src/lib.rs b/src/tools/clippy/clippy_utils/src/lib.rs
index 964104fc31d0e..f8e559deee8b7 100644
--- a/src/tools/clippy/clippy_utils/src/lib.rs
+++ b/src/tools/clippy/clippy_utils/src/lib.rs
@@ -2,6 +2,7 @@
 #![feature(box_patterns)]
 #![feature(let_chains)]
 #![feature(lint_reasons)]
+#![feature(negative_impls)]
 #![feature(never_type)]
 #![feature(rustc_private)]
 #![recursion_limit = "512"]
diff --git a/src/tools/clippy/clippy_utils/src/visitors.rs b/src/tools/clippy/clippy_utils/src/visitors.rs
index 5dcd71cef127e..cb97f95995cf4 100644
--- a/src/tools/clippy/clippy_utils/src/visitors.rs
+++ b/src/tools/clippy/clippy_utils/src/visitors.rs
@@ -113,6 +113,10 @@ pub fn for_each_expr_with_closures<'tcx, B, C: Continue>(
         f: F,
         res: Option<B>,
     }
+    // FIXME: more details in #106930
+    impl<'tcx, B, F> !Send for V<'tcx, B, F> {}
+    impl<'tcx, B, F> !Sync for V<'tcx, B, F> {}
+
     impl<'tcx, B, C: Continue, F: FnMut(&'tcx Expr<'tcx>) -> ControlFlow<B, C>> Visitor<'tcx> for V<'tcx, B, F> {
         type NestedFilter = nested_filter::OnlyBodies;
         fn nested_visit_map(&mut self) -> Self::Map {
@@ -513,6 +517,10 @@ pub fn for_each_local_use_after_expr<'tcx, B>(
         res: ControlFlow<B>,
         f: F,
     }
+    // FIXME: more details in #106930
+    impl<'cx, 'tcx, B, F> !Send for V<'cx, 'tcx, B, F> {}
+    impl<'cx, 'tcx, B, F> !Sync for V<'cx, 'tcx, B, F> {}
+
     impl<'cx, 'tcx, F: FnMut(&'tcx Expr<'tcx>) -> ControlFlow<B>, B> Visitor<'tcx> for V<'cx, 'tcx, F, B> {
         type NestedFilter = nested_filter::OnlyBodies;
         fn nested_visit_map(&mut self) -> Self::Map {
@@ -694,6 +702,10 @@ pub fn for_each_local_assignment<'tcx, B>(
         res: ControlFlow<B>,
         f: F,
     }
+    // FIXME: more details in #106930
+    impl<'cx, 'tcx, F, B> !Send for V<'cx, 'tcx, F, B> {}
+    impl<'cx, 'tcx, F, B> !Sync for V<'cx, 'tcx, F, B> {}
+
     impl<'cx, 'tcx, F: FnMut(&'tcx Expr<'tcx>) -> ControlFlow<B>, B> Visitor<'tcx> for V<'cx, 'tcx, F, B> {
         type NestedFilter = nested_filter::OnlyBodies;
         fn nested_visit_map(&mut self) -> Self::Map {
diff --git a/tests/ui-fulldeps/missing-rustc-driver-error.stderr b/tests/ui-fulldeps/missing-rustc-driver-error.stderr
index ad03ba0103c52..2553eaf208d81 100644
--- a/tests/ui-fulldeps/missing-rustc-driver-error.stderr
+++ b/tests/ui-fulldeps/missing-rustc-driver-error.stderr
@@ -20,5 +20,25 @@ error: crate `cfg_if` required to be available in rlib format, but was not found
 
 error: crate `libc` required to be available in rlib format, but was not found in this form
 
-error: aborting due to 10 previous errors
+error: crate `rayon` required to be available in rlib format, but was not found in this form
+
+error: crate `either` required to be available in rlib format, but was not found in this form
+
+error: crate `rayon_core` required to be available in rlib format, but was not found in this form
+
+error: crate `crossbeam_channel` required to be available in rlib format, but was not found in this form
+
+error: crate `crossbeam_utils` required to be available in rlib format, but was not found in this form
+
+error: crate `crossbeam_deque` required to be available in rlib format, but was not found in this form
+
+error: crate `crossbeam_epoch` required to be available in rlib format, but was not found in this form
+
+error: crate `scopeguard` required to be available in rlib format, but was not found in this form
+
+error: crate `memoffset` required to be available in rlib format, but was not found in this form
+
+error: crate `num_cpus` required to be available in rlib format, but was not found in this form
+
+error: aborting due to 20 previous errors