diff --git a/Cargo.lock b/Cargo.lock
index df2842bddb386..b91e8d85316b6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2983,6 +2983,15 @@ dependencies = [
  "getrandom 0.3.3",
 ]
 
+[[package]]
+name = "rand_xorshift"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a"
+dependencies = [
+ "rand_core 0.9.3",
+]
+
 [[package]]
 name = "rand_xoshiro"
 version = "0.7.0"
@@ -3182,16 +3191,6 @@ dependencies = [
  "tikv-jemalloc-sys",
 ]
 
-[[package]]
-name = "rustc-rayon-core"
-version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2f42932dcd3bcbe484b38a3ccf79b7906fac41c02d408b5b1bac26da3416efdb"
-dependencies = [
- "crossbeam-deque",
- "crossbeam-utils",
-]
-
 [[package]]
 name = "rustc-semver"
 version = "1.1.0"
@@ -3560,7 +3559,6 @@ dependencies = [
  "parking_lot",
  "portable-atomic",
  "rustc-hash 2.1.1",
- "rustc-rayon-core",
  "rustc-stable-hash",
  "rustc_arena",
  "rustc_graphviz",
@@ -3568,6 +3566,7 @@ dependencies = [
  "rustc_index",
  "rustc_macros",
  "rustc_serialize",
+ "rustc_thread_pool",
  "smallvec",
  "stacker",
  "tempfile",
@@ -3915,7 +3914,6 @@ dependencies = [
 name = "rustc_interface"
 version = "0.0.0"
 dependencies = [
- "rustc-rayon-core",
  "rustc_abi",
  "rustc_ast",
  "rustc_ast_lowering",
@@ -3954,6 +3952,7 @@ dependencies = [
  "rustc_span",
  "rustc_symbol_mangling",
  "rustc_target",
+ "rustc_thread_pool",
  "rustc_trait_selection",
  "rustc_traits",
  "rustc_ty_utils",
@@ -4081,7 +4080,6 @@ dependencies = [
  "either",
  "gsgdt",
  "polonius-engine",
- "rustc-rayon-core",
  "rustc_abi",
  "rustc_apfloat",
  "rustc_arena",
@@ -4105,6 +4103,7 @@ dependencies = [
  "rustc_session",
  "rustc_span",
  "rustc_target",
+ "rustc_thread_pool",
  "rustc_type_ir",
  "smallvec",
  "thin-vec",
@@ -4351,7 +4350,6 @@ version = "0.0.0"
 dependencies = [
  "hashbrown",
  "parking_lot",
- "rustc-rayon-core",
  "rustc_abi",
  "rustc_ast",
  "rustc_attr_data_structures",
@@ -4366,6 +4364,7 @@ dependencies = [
  "rustc_serialize",
  "rustc_session",
  "rustc_span",
+ "rustc_thread_pool",
  "smallvec",
  "tracing",
 ]
@@ -4527,6 +4526,18 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "rustc_thread_pool"
+version = "0.0.0"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+ "libc",
+ "rand 0.9.1",
+ "rand_xorshift",
+ "scoped-tls",
+]
+
 [[package]]
 name = "rustc_tools_util"
 version = "0.4.2"
diff --git a/Cargo.toml b/Cargo.toml
index c4d2a06f4cb17..e08f14d210167 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -60,7 +60,7 @@ exclude = [
   "obj",
 ]
 
-[profile.release.package.rustc-rayon-core]
+[profile.release.package.rustc_thread_pool]
 # The rustc fork of Rayon has deadlock detection code which intermittently
 # causes overflows in the CI (see https://github.com/rust-lang/rust/issues/90227)
 # so we turn overflow checks off for now.
diff --git a/compiler/rustc_ast_lowering/messages.ftl b/compiler/rustc_ast_lowering/messages.ftl
index 50eb7c7ae99bd..c6472fd45fa98 100644
--- a/compiler/rustc_ast_lowering/messages.ftl
+++ b/compiler/rustc_ast_lowering/messages.ftl
@@ -172,9 +172,6 @@ ast_lowering_template_modifier = template modifier
 
 ast_lowering_this_not_async = this is not `async`
 
-ast_lowering_underscore_array_length_unstable =
-    using `_` for array lengths is unstable
-
 ast_lowering_underscore_expr_lhs_assign =
     in expressions, `_` can only be used on the left-hand side of an assignment
     .label = `_` not allowed here
diff --git a/compiler/rustc_ast_lowering/src/lib.rs b/compiler/rustc_ast_lowering/src/lib.rs
index 3b99a653417aa..e74fd1db15b96 100644
--- a/compiler/rustc_ast_lowering/src/lib.rs
+++ b/compiler/rustc_ast_lowering/src/lib.rs
@@ -48,7 +48,7 @@ use rustc_data_structures::sorted_map::SortedMap;
 use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
 use rustc_data_structures::sync::spawn;
 use rustc_data_structures::tagged_ptr::TaggedRef;
-use rustc_errors::{DiagArgFromDisplay, DiagCtxtHandle, StashKey};
+use rustc_errors::{DiagArgFromDisplay, DiagCtxtHandle};
 use rustc_hir::def::{DefKind, LifetimeRes, Namespace, PartialRes, PerNS, Res};
 use rustc_hir::def_id::{CRATE_DEF_ID, LOCAL_CRATE, LocalDefId};
 use rustc_hir::lints::DelayedLint;
@@ -60,7 +60,7 @@ use rustc_index::{Idx, IndexSlice, IndexVec};
 use rustc_macros::extension;
 use rustc_middle::span_bug;
 use rustc_middle::ty::{ResolverAstLowering, TyCtxt};
-use rustc_session::parse::{add_feature_diagnostics, feature_err};
+use rustc_session::parse::add_feature_diagnostics;
 use rustc_span::symbol::{Ident, Symbol, kw, sym};
 use rustc_span::{DUMMY_SP, DesugaringKind, Span};
 use smallvec::SmallVec;
@@ -2109,15 +2109,6 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
         // `ExprKind::Paren(ExprKind::Underscore)` and should also be lowered to `GenericArg::Infer`
         match c.value.peel_parens().kind {
             ExprKind::Underscore => {
-                if !self.tcx.features().generic_arg_infer() {
-                    feature_err(
-                        &self.tcx.sess,
-                        sym::generic_arg_infer,
-                        c.value.span,
-                        fluent_generated::ast_lowering_underscore_array_length_unstable,
-                    )
-                    .stash(c.value.span, StashKey::UnderscoreForArrayLengths);
-                }
                 let ct_kind = hir::ConstArgKind::Infer(self.lower_span(c.value.span), ());
                 self.arena.alloc(hir::ConstArg { hir_id: self.lower_node_id(c.id), kind: ct_kind })
             }
diff --git a/compiler/rustc_data_structures/Cargo.toml b/compiler/rustc_data_structures/Cargo.toml
index f6a0201161851..17204883fb033 100644
--- a/compiler/rustc_data_structures/Cargo.toml
+++ b/compiler/rustc_data_structures/Cargo.toml
@@ -14,7 +14,6 @@ indexmap = "2.4.0"
 jobserver_crate = { version = "0.1.28", package = "jobserver" }
 measureme = "12.0.1"
 rustc-hash = "2.0.0"
-rustc-rayon-core = { version = "0.5.0" }
 rustc-stable-hash = { version = "0.1.0", features = ["nightly"] }
 rustc_arena = { path = "../rustc_arena" }
 rustc_graphviz = { path = "../rustc_graphviz" }
@@ -22,6 +21,7 @@ rustc_hashes = { path = "../rustc_hashes" }
 rustc_index = { path = "../rustc_index", package = "rustc_index" }
 rustc_macros = { path = "../rustc_macros" }
 rustc_serialize = { path = "../rustc_serialize" }
+rustc_thread_pool = { path = "../rustc_thread_pool" }
 smallvec = { version = "1.8.1", features = ["const_generics", "union", "may_dangle"] }
 stacker = "0.1.17"
 tempfile = "3.2"
diff --git a/compiler/rustc_data_structures/src/sync.rs b/compiler/rustc_data_structures/src/sync.rs
index b28c333d860c6..3881f3c2aa841 100644
--- a/compiler/rustc_data_structures/src/sync.rs
+++ b/compiler/rustc_data_structures/src/sync.rs
@@ -22,8 +22,6 @@
 //! |                         |                     | `parking_lot::Mutex<T>`         |
 //! | `RwLock<T>`             | `RefCell<T>`        | `parking_lot::RwLock<T>`        |
 //! | `MTLock<T>`        [^1] | `T`                 | `Lock<T>`                       |
-//! |                         |                     |                                 |
-//! | `ParallelIterator`      | `Iterator`          | `rayon::iter::ParallelIterator` |
 //!
 //! [^1]: `MTLock` is similar to `Lock`, but the serial version avoids the cost
 //! of a `RefCell`. This is appropriate when interior mutability is not
diff --git a/compiler/rustc_data_structures/src/sync/parallel.rs b/compiler/rustc_data_structures/src/sync/parallel.rs
index ab65c7f3a6b5f..ff4b60a1031b9 100644
--- a/compiler/rustc_data_structures/src/sync/parallel.rs
+++ b/compiler/rustc_data_structures/src/sync/parallel.rs
@@ -96,7 +96,7 @@ macro_rules! parallel {
 pub fn spawn(func: impl FnOnce() + DynSend + 'static) {
     if mode::is_dyn_thread_safe() {
         let func = FromDyn::from(func);
-        rayon_core::spawn(|| {
+        rustc_thread_pool::spawn(|| {
             (func.into_inner())();
         });
     } else {
@@ -107,11 +107,11 @@ pub fn spawn(func: impl FnOnce() + DynSend + 'static) {
 // This function only works when `mode::is_dyn_thread_safe()`.
 pub fn scope<'scope, OP, R>(op: OP) -> R
 where
-    OP: FnOnce(&rayon_core::Scope<'scope>) -> R + DynSend,
+    OP: FnOnce(&rustc_thread_pool::Scope<'scope>) -> R + DynSend,
     R: DynSend,
 {
     let op = FromDyn::from(op);
-    rayon_core::scope(|s| FromDyn::from(op.into_inner()(s))).into_inner()
+    rustc_thread_pool::scope(|s| FromDyn::from(op.into_inner()(s))).into_inner()
 }
 
 #[inline]
@@ -124,7 +124,7 @@ where
         let oper_a = FromDyn::from(oper_a);
         let oper_b = FromDyn::from(oper_b);
         let (a, b) = parallel_guard(|guard| {
-            rayon_core::join(
+            rustc_thread_pool::join(
                 move || guard.run(move || FromDyn::from(oper_a.into_inner()())),
                 move || guard.run(move || FromDyn::from(oper_b.into_inner()())),
             )
@@ -158,7 +158,7 @@ fn par_slice<I: DynSend>(
             let (left, right) = items.split_at_mut(items.len() / 2);
             let mut left = state.for_each.derive(left);
             let mut right = state.for_each.derive(right);
-            rayon_core::join(move || par_rec(*left, state), move || par_rec(*right, state));
+            rustc_thread_pool::join(move || par_rec(*left, state), move || par_rec(*right, state));
         }
     }
 
@@ -241,7 +241,7 @@ pub fn par_map<I: DynSend, T: IntoIterator<Item = I>, R: DynSend, C: FromIterato
 pub fn broadcast<R: DynSend>(op: impl Fn(usize) -> R + DynSync) -> Vec<R> {
     if mode::is_dyn_thread_safe() {
         let op = FromDyn::from(op);
-        let results = rayon_core::broadcast(|context| op.derive(op(context.index())));
+        let results = rustc_thread_pool::broadcast(|context| op.derive(op(context.index())));
         results.into_iter().map(|r| r.into_inner()).collect()
     } else {
         vec![op(0)]
diff --git a/compiler/rustc_feature/src/accepted.rs b/compiler/rustc_feature/src/accepted.rs
index b1c185220f45b..cfe0f4e5d6cb6 100644
--- a/compiler/rustc_feature/src/accepted.rs
+++ b/compiler/rustc_feature/src/accepted.rs
@@ -220,6 +220,8 @@ declare_features! (
     (accepted, fn_must_use, "1.27.0", Some(43302)),
     /// Allows capturing variables in scope using format_args!
     (accepted, format_args_capture, "1.58.0", Some(67984)),
+    /// Infer generic args for both consts and types.
+    (accepted, generic_arg_infer, "CURRENT_RUSTC_VERSION", Some(85077)),
     /// Allows associated types to be generic, e.g., `type Foo<T>;` (RFC 1598).
     (accepted, generic_associated_types, "1.65.0", Some(44265)),
     /// Allows attributes on lifetime/type formal parameters in generics (RFC 1327).
diff --git a/compiler/rustc_feature/src/unstable.rs b/compiler/rustc_feature/src/unstable.rs
index 5e42b919f9d9c..a7bc6207149f5 100644
--- a/compiler/rustc_feature/src/unstable.rs
+++ b/compiler/rustc_feature/src/unstable.rs
@@ -520,8 +520,6 @@ declare_features! (
     (unstable, frontmatter, "1.88.0", Some(136889)),
     /// Allows defining gen blocks and `gen fn`.
     (unstable, gen_blocks, "1.75.0", Some(117078)),
-    /// Infer generic args for both consts and types.
-    (unstable, generic_arg_infer, "1.55.0", Some(85077)),
     /// Allows non-trivial generic constants which have to have wfness manually propagated to callers
     (incomplete, generic_const_exprs, "1.56.0", Some(76560)),
     /// Allows generic parameters and where-clauses on free & associated const items.
diff --git a/compiler/rustc_hir_analysis/src/collect/type_of.rs b/compiler/rustc_hir_analysis/src/collect/type_of.rs
index 141d96b57e579..902a2e15dffde 100644
--- a/compiler/rustc_hir_analysis/src/collect/type_of.rs
+++ b/compiler/rustc_hir_analysis/src/collect/type_of.rs
@@ -452,13 +452,6 @@ fn infer_placeholder_type<'tcx>(
             if let Some(ty) = node.ty() {
                 visitor.visit_ty_unambig(ty);
             }
-            // If we have just one span, let's try to steal a const `_` feature error.
-            let try_steal_span = if !tcx.features().generic_arg_infer() && visitor.spans.len() == 1
-            {
-                visitor.spans.first().copied()
-            } else {
-                None
-            };
             // If we didn't find any infer tys, then just fallback to `span`.
             if visitor.spans.is_empty() {
                 visitor.spans.push(span);
@@ -489,15 +482,7 @@ fn infer_placeholder_type<'tcx>(
                 }
             }
 
-            if let Some(try_steal_span) = try_steal_span {
-                cx.dcx().try_steal_replace_and_emit_err(
-                    try_steal_span,
-                    StashKey::UnderscoreForArrayLengths,
-                    diag,
-                )
-            } else {
-                diag.emit()
-            }
+            diag.emit()
         });
     Ty::new_error(tcx, guar)
 }
diff --git a/compiler/rustc_hir_analysis/src/hir_ty_lowering/generics.rs b/compiler/rustc_hir_analysis/src/hir_ty_lowering/generics.rs
index 3a26b8331f8b3..8c7c3750865cf 100644
--- a/compiler/rustc_hir_analysis/src/hir_ty_lowering/generics.rs
+++ b/compiler/rustc_hir_analysis/src/hir_ty_lowering/generics.rs
@@ -8,7 +8,7 @@ use rustc_middle::ty::{
     self, GenericArgsRef, GenericParamDef, GenericParamDefKind, IsSuggestable, Ty,
 };
 use rustc_session::lint::builtin::LATE_BOUND_LIFETIME_ARGUMENTS;
-use rustc_span::{kw, sym};
+use rustc_span::kw;
 use smallvec::SmallVec;
 use tracing::{debug, instrument};
 
@@ -258,19 +258,6 @@ pub fn lower_generic_args<'tcx: 'a, 'a>(
                             GenericParamDefKind::Const { .. },
                             _,
                         ) => {
-                            if let GenericParamDefKind::Const { .. } = param.kind
-                                && let GenericArg::Infer(inf) = arg
-                                && !tcx.features().generic_arg_infer()
-                            {
-                                rustc_session::parse::feature_err(
-                                    tcx.sess,
-                                    sym::generic_arg_infer,
-                                    inf.span,
-                                    "const arguments cannot yet be inferred with `_`",
-                                )
-                                .emit();
-                            }
-
                             // We lower to an infer even when the feature gate is not enabled
                             // as it is useful for diagnostics to be able to see a `ConstKind::Infer`
                             args.push(ctx.provided_kind(&args, param, arg));
diff --git a/compiler/rustc_interface/Cargo.toml b/compiler/rustc_interface/Cargo.toml
index ff28dbeaee698..a72a795878786 100644
--- a/compiler/rustc_interface/Cargo.toml
+++ b/compiler/rustc_interface/Cargo.toml
@@ -5,7 +5,6 @@ edition = "2024"
 
 [dependencies]
 # tidy-alphabetical-start
-rustc-rayon-core = { version = "0.5.0" }
 rustc_ast = { path = "../rustc_ast" }
 rustc_ast_lowering = { path = "../rustc_ast_lowering" }
 rustc_ast_passes = { path = "../rustc_ast_passes" }
@@ -43,6 +42,7 @@ rustc_session = { path = "../rustc_session" }
 rustc_span = { path = "../rustc_span" }
 rustc_symbol_mangling = { path = "../rustc_symbol_mangling" }
 rustc_target = { path = "../rustc_target" }
+rustc_thread_pool = { path = "../rustc_thread_pool" }
 rustc_trait_selection = { path = "../rustc_trait_selection" }
 rustc_traits = { path = "../rustc_traits" }
 rustc_ty_utils = { path = "../rustc_ty_utils" }
diff --git a/compiler/rustc_interface/src/tests.rs b/compiler/rustc_interface/src/tests.rs
index 82823581c1210..a0012b04c4f29 100644
--- a/compiler/rustc_interface/src/tests.rs
+++ b/compiler/rustc_interface/src/tests.rs
@@ -802,6 +802,7 @@ fn test_unstable_options_tracking_hash() {
     tracked!(force_unstable_if_unmarked, true);
     tracked!(function_return, FunctionReturn::ThunkExtern);
     tracked!(function_sections, Some(false));
+    tracked!(hint_mostly_unused, true);
     tracked!(human_readable_cgu_names, true);
     tracked!(incremental_ignore_spans, true);
     tracked!(inline_mir, Some(true));
diff --git a/compiler/rustc_interface/src/util.rs b/compiler/rustc_interface/src/util.rs
index 8bdc24d47d98a..8a7d61172655f 100644
--- a/compiler/rustc_interface/src/util.rs
+++ b/compiler/rustc_interface/src/util.rs
@@ -208,7 +208,7 @@ pub(crate) fn run_in_thread_pool_with_globals<
 
     let proxy_ = Arc::clone(&proxy);
     let proxy__ = Arc::clone(&proxy);
-    let builder = rayon_core::ThreadPoolBuilder::new()
+    let builder = rustc_thread_pool::ThreadPoolBuilder::new()
         .thread_name(|_| "rustc".to_string())
         .acquire_thread_handler(move || proxy_.acquire_thread())
         .release_thread_handler(move || proxy__.release_thread())
@@ -218,7 +218,7 @@ pub(crate) fn run_in_thread_pool_with_globals<
             // locals to it. The new thread runs the deadlock handler.
 
             let current_gcx2 = current_gcx2.clone();
-            let registry = rayon_core::Registry::current();
+            let registry = rustc_thread_pool::Registry::current();
             let session_globals = rustc_span::with_session_globals(|session_globals| {
                 session_globals as *const SessionGlobals as usize
             });
@@ -265,7 +265,7 @@ pub(crate) fn run_in_thread_pool_with_globals<
             builder
                 .build_scoped(
                     // Initialize each new worker thread when created.
-                    move |thread: rayon_core::ThreadBuilder| {
+                    move |thread: rustc_thread_pool::ThreadBuilder| {
                         // Register the thread for use with the `WorkerLocal` type.
                         registry.register();
 
@@ -274,7 +274,7 @@ pub(crate) fn run_in_thread_pool_with_globals<
                         })
                     },
                     // Run `f` on the first thread in the thread pool.
-                    move |pool: &rayon_core::ThreadPool| {
+                    move |pool: &rustc_thread_pool::ThreadPool| {
                         pool.install(|| f(current_gcx.into_inner(), proxy))
                     },
                 )
diff --git a/compiler/rustc_middle/Cargo.toml b/compiler/rustc_middle/Cargo.toml
index 43c1af642dd56..edd0af6e4f535 100644
--- a/compiler/rustc_middle/Cargo.toml
+++ b/compiler/rustc_middle/Cargo.toml
@@ -9,7 +9,6 @@ bitflags = "2.4.1"
 either = "1.5.0"
 gsgdt = "0.1.2"
 polonius-engine = "0.13.0"
-rustc-rayon-core = { version = "0.5.0" }
 rustc_abi = { path = "../rustc_abi" }
 rustc_apfloat = "0.2.0"
 rustc_arena = { path = "../rustc_arena" }
@@ -33,6 +32,7 @@ rustc_serialize = { path = "../rustc_serialize" }
 rustc_session = { path = "../rustc_session" }
 rustc_span = { path = "../rustc_span" }
 rustc_target = { path = "../rustc_target" }
+rustc_thread_pool = { path = "../rustc_thread_pool" }
 rustc_type_ir = { path = "../rustc_type_ir" }
 smallvec = { version = "1.8.1", features = ["union", "may_dangle"] }
 thin-vec = "0.2.12"
diff --git a/compiler/rustc_middle/src/ty/context/tls.rs b/compiler/rustc_middle/src/ty/context/tls.rs
index 5fc80bc793673..fa9995898ac20 100644
--- a/compiler/rustc_middle/src/ty/context/tls.rs
+++ b/compiler/rustc_middle/src/ty/context/tls.rs
@@ -36,7 +36,7 @@ impl<'a, 'tcx> ImplicitCtxt<'a, 'tcx> {
 }
 
 // Import the thread-local variable from Rayon, which is preserved for Rayon jobs.
-use rayon_core::tlv::TLV;
+use rustc_thread_pool::tlv::TLV;
 
 #[inline]
 fn erase(context: &ImplicitCtxt<'_, '_>) -> *const () {
diff --git a/compiler/rustc_mir_transform/src/copy_prop.rs b/compiler/rustc_mir_transform/src/copy_prop.rs
index 27af5818982d0..fe78a104fa0b6 100644
--- a/compiler/rustc_mir_transform/src/copy_prop.rs
+++ b/compiler/rustc_mir_transform/src/copy_prop.rs
@@ -30,6 +30,8 @@ impl<'tcx> crate::MirPass<'tcx> for CopyProp {
 
         let typing_env = body.typing_env(tcx);
         let ssa = SsaLocals::new(tcx, body, typing_env);
+        debug!(borrowed_locals = ?ssa.borrowed_locals());
+        debug!(copy_classes = ?ssa.copy_classes());
 
         let fully_moved = fully_moved_locals(&ssa, body);
         debug!(?fully_moved);
@@ -43,14 +45,8 @@ impl<'tcx> crate::MirPass<'tcx> for CopyProp {
 
         let any_replacement = ssa.copy_classes().iter_enumerated().any(|(l, &h)| l != h);
 
-        Replacer {
-            tcx,
-            copy_classes: ssa.copy_classes(),
-            fully_moved,
-            borrowed_locals: ssa.borrowed_locals(),
-            storage_to_remove,
-        }
-        .visit_body_preserves_cfg(body);
+        Replacer { tcx, copy_classes: ssa.copy_classes(), fully_moved, storage_to_remove }
+            .visit_body_preserves_cfg(body);
 
         if any_replacement {
             crate::simplify::remove_unused_definitions(body);
@@ -102,7 +98,6 @@ struct Replacer<'a, 'tcx> {
     tcx: TyCtxt<'tcx>,
     fully_moved: DenseBitSet<Local>,
     storage_to_remove: DenseBitSet<Local>,
-    borrowed_locals: &'a DenseBitSet<Local>,
     copy_classes: &'a IndexSlice<Local, Local>,
 }
 
@@ -111,34 +106,18 @@ impl<'tcx> MutVisitor<'tcx> for Replacer<'_, 'tcx> {
         self.tcx
     }
 
+    #[tracing::instrument(level = "trace", skip(self))]
     fn visit_local(&mut self, local: &mut Local, ctxt: PlaceContext, _: Location) {
         let new_local = self.copy_classes[*local];
-        // We must not unify two locals that are borrowed. But this is fine if one is borrowed and
-        // the other is not. We chose to check the original local, and not the target. That way, if
-        // the original local is borrowed and the target is not, we do not pessimize the whole class.
-        if self.borrowed_locals.contains(*local) {
-            return;
-        }
         match ctxt {
             // Do not modify the local in storage statements.
             PlaceContext::NonUse(NonUseContext::StorageLive | NonUseContext::StorageDead) => {}
-            // The local should have been marked as non-SSA.
-            PlaceContext::MutatingUse(_) => assert_eq!(*local, new_local),
             // We access the value.
             _ => *local = new_local,
         }
     }
 
-    fn visit_place(&mut self, place: &mut Place<'tcx>, _: PlaceContext, loc: Location) {
-        if let Some(new_projection) = self.process_projection(place.projection, loc) {
-            place.projection = self.tcx().mk_place_elems(&new_projection);
-        }
-
-        // Any non-mutating use context is ok.
-        let ctxt = PlaceContext::NonMutatingUse(NonMutatingUseContext::Copy);
-        self.visit_local(&mut place.local, ctxt, loc)
-    }
-
+    #[tracing::instrument(level = "trace", skip(self))]
     fn visit_operand(&mut self, operand: &mut Operand<'tcx>, loc: Location) {
         if let Operand::Move(place) = *operand
             // A move out of a projection of a copy is equivalent to a copy of the original
@@ -151,6 +130,7 @@ impl<'tcx> MutVisitor<'tcx> for Replacer<'_, 'tcx> {
         self.super_operand(operand, loc);
     }
 
+    #[tracing::instrument(level = "trace", skip(self))]
     fn visit_statement(&mut self, stmt: &mut Statement<'tcx>, loc: Location) {
         // When removing storage statements, we need to remove both (#107511).
         if let StatementKind::StorageLive(l) | StatementKind::StorageDead(l) = stmt.kind
diff --git a/compiler/rustc_mir_transform/src/cross_crate_inline.rs b/compiler/rustc_mir_transform/src/cross_crate_inline.rs
index 727d4a126d21e..6d7b7e10ef697 100644
--- a/compiler/rustc_mir_transform/src/cross_crate_inline.rs
+++ b/compiler/rustc_mir_transform/src/cross_crate_inline.rs
@@ -50,6 +50,13 @@ fn cross_crate_inlinable(tcx: TyCtxt<'_>, def_id: LocalDefId) -> bool {
         _ => {}
     }
 
+    // If the crate is likely to be mostly unused, use cross-crate inlining to defer codegen until
+    // the function is referenced, in order to skip codegen for unused functions. This is
+    // intentionally after the check for `inline(never)`, so that `inline(never)` wins.
+    if tcx.sess.opts.unstable_opts.hint_mostly_unused {
+        return true;
+    }
+
     let sig = tcx.fn_sig(def_id).instantiate_identity();
     for ty in sig.inputs().skip_binder().iter().chain(std::iter::once(&sig.output().skip_binder()))
     {
diff --git a/compiler/rustc_mir_transform/src/ssa.rs b/compiler/rustc_mir_transform/src/ssa.rs
index edd0cabca49a4..03b6f9b7ff3b8 100644
--- a/compiler/rustc_mir_transform/src/ssa.rs
+++ b/compiler/rustc_mir_transform/src/ssa.rs
@@ -293,6 +293,10 @@ impl<'tcx> Visitor<'tcx> for SsaVisitor<'_, 'tcx> {
 fn compute_copy_classes(ssa: &mut SsaLocals, body: &Body<'_>) {
     let mut direct_uses = std::mem::take(&mut ssa.direct_uses);
     let mut copies = IndexVec::from_fn_n(|l| l, body.local_decls.len());
+    // We must not unify two locals that are borrowed. But this is fine if one is borrowed and
+    // the other is not. This bitset is keyed by *class head* and contains whether any member of
+    // the class is borrowed.
+    let mut borrowed_classes = ssa.borrowed_locals().clone();
 
     for (local, rvalue, _) in ssa.assignments(body) {
         let (Rvalue::Use(Operand::Copy(place) | Operand::Move(place))
@@ -318,6 +322,11 @@ fn compute_copy_classes(ssa: &mut SsaLocals, body: &Body<'_>) {
         // visited before `local`, and we just have to copy the representing local.
         let head = copies[rhs];
 
+        // Do not unify two borrowed locals.
+        if borrowed_classes.contains(local) && borrowed_classes.contains(head) {
+            continue;
+        }
+
         if local == RETURN_PLACE {
             // `_0` is special, we cannot rename it. Instead, rename the class of `rhs` to
             // `RETURN_PLACE`. This is only possible if the class head is a temporary, not an
@@ -330,14 +339,21 @@ fn compute_copy_classes(ssa: &mut SsaLocals, body: &Body<'_>) {
                     *h = RETURN_PLACE;
                 }
             }
+            if borrowed_classes.contains(head) {
+                borrowed_classes.insert(RETURN_PLACE);
+            }
         } else {
             copies[local] = head;
+            if borrowed_classes.contains(local) {
+                borrowed_classes.insert(head);
+            }
         }
         direct_uses[rhs] -= 1;
     }
 
     debug!(?copies);
     debug!(?direct_uses);
+    debug!(?borrowed_classes);
 
     // Invariant: `copies` must point to the head of an equivalence class.
     #[cfg(debug_assertions)]
@@ -346,6 +362,13 @@ fn compute_copy_classes(ssa: &mut SsaLocals, body: &Body<'_>) {
     }
     debug_assert_eq!(copies[RETURN_PLACE], RETURN_PLACE);
 
+    // Invariant: `borrowed_classes` must be true if any member of the class is borrowed.
+    #[cfg(debug_assertions)]
+    for &head in copies.iter() {
+        let any_borrowed = ssa.borrowed_locals.iter().any(|l| copies[l] == head);
+        assert_eq!(borrowed_classes.contains(head), any_borrowed);
+    }
+
     ssa.direct_uses = direct_uses;
     ssa.copy_classes = copies;
 }
diff --git a/compiler/rustc_query_system/Cargo.toml b/compiler/rustc_query_system/Cargo.toml
index 7db06953aeb64..3d2d879a76445 100644
--- a/compiler/rustc_query_system/Cargo.toml
+++ b/compiler/rustc_query_system/Cargo.toml
@@ -6,7 +6,6 @@ edition = "2024"
 [dependencies]
 # tidy-alphabetical-start
 parking_lot = "0.12"
-rustc-rayon-core = { version = "0.5.0" }
 rustc_abi = { path = "../rustc_abi" }
 rustc_ast = { path = "../rustc_ast" }
 rustc_attr_data_structures = { path = "../rustc_attr_data_structures" }
@@ -21,6 +20,7 @@ rustc_macros = { path = "../rustc_macros" }
 rustc_serialize = { path = "../rustc_serialize" }
 rustc_session = { path = "../rustc_session" }
 rustc_span = { path = "../rustc_span" }
+rustc_thread_pool = { path = "../rustc_thread_pool" }
 smallvec = { version = "1.8.1", features = ["union", "may_dangle"] }
 tracing = "0.1"
 # tidy-alphabetical-end
diff --git a/compiler/rustc_query_system/src/query/job.rs b/compiler/rustc_query_system/src/query/job.rs
index 1e79bd461d2a4..7e61f5026da4a 100644
--- a/compiler/rustc_query_system/src/query/job.rs
+++ b/compiler/rustc_query_system/src/query/job.rs
@@ -236,7 +236,7 @@ impl<I> QueryLatch<I> {
             // If this detects a deadlock and the deadlock handler wants to resume this thread
             // we have to be in the `wait` call. This is ensured by the deadlock handler
             // getting the self.info lock.
-            rayon_core::mark_blocked();
+            rustc_thread_pool::mark_blocked();
             let proxy = qcx.jobserver_proxy();
             proxy.release_thread();
             waiter.condvar.wait(&mut info);
@@ -251,9 +251,9 @@ impl<I> QueryLatch<I> {
         let mut info = self.info.lock();
         debug_assert!(!info.complete);
         info.complete = true;
-        let registry = rayon_core::Registry::current();
+        let registry = rustc_thread_pool::Registry::current();
         for waiter in info.waiters.drain(..) {
-            rayon_core::mark_unblocked(&registry);
+            rustc_thread_pool::mark_unblocked(&registry);
             waiter.condvar.notify_one();
         }
     }
@@ -507,7 +507,7 @@ fn remove_cycle<I: Clone>(
 /// all active queries for cycles before finally resuming all the waiters at once.
 pub fn break_query_cycles<I: Clone + Debug>(
     query_map: QueryMap<I>,
-    registry: &rayon_core::Registry,
+    registry: &rustc_thread_pool::Registry,
 ) {
     let mut wakelist = Vec::new();
     // It is OK per the comments:
@@ -543,7 +543,7 @@ pub fn break_query_cycles<I: Clone + Debug>(
     // we wake the threads up as otherwise Rayon could detect a deadlock if a thread we
     // resumed fell asleep and this thread had yet to mark the remaining threads as unblocked.
     for _ in 0..wakelist.len() {
-        rayon_core::mark_unblocked(registry);
+        rustc_thread_pool::mark_unblocked(registry);
     }
 
     for waiter in wakelist.into_iter() {
diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs
index 9ca405333f43d..0ac2702cac56a 100644
--- a/compiler/rustc_session/src/options.rs
+++ b/compiler/rustc_session/src/options.rs
@@ -2235,6 +2235,8 @@ options! {
         environment variable `RUSTC_GRAPHVIZ_FONT` (default: `Courier, monospace`)"),
     has_thread_local: Option<bool> = (None, parse_opt_bool, [TRACKED],
         "explicitly enable the `cfg(target_thread_local)` directive"),
+    hint_mostly_unused: bool = (false, parse_bool, [TRACKED],
+        "hint that most of this crate will go unused, to minimize work for uncalled functions"),
     human_readable_cgu_names: bool = (false, parse_bool, [TRACKED],
         "generate human-readable, predictable names for codegen units (default: no)"),
     identify_regions: bool = (false, parse_bool, [UNTRACKED],
diff --git a/compiler/rustc_thread_pool/Cargo.toml b/compiler/rustc_thread_pool/Cargo.toml
new file mode 100644
index 0000000000000..d0bd065c45787
--- /dev/null
+++ b/compiler/rustc_thread_pool/Cargo.toml
@@ -0,0 +1,50 @@
+[package]
+name = "rustc_thread_pool"
+version = "0.0.0"
+authors = ["Niko Matsakis <niko@alum.mit.edu>",
+           "Josh Stone <cuviper@gmail.com>"]
+description = "Core APIs for Rayon - fork for rustc"
+license = "MIT OR Apache-2.0"
+rust-version = "1.63"
+edition = "2021"
+readme = "README.md"
+keywords = ["parallel", "thread", "concurrency", "join", "performance"]
+categories = ["concurrency"]
+
+[dependencies]
+crossbeam-deque = "0.8"
+crossbeam-utils = "0.8"
+
+[dev-dependencies]
+rand = "0.9"
+rand_xorshift = "0.4"
+scoped-tls = "1.0"
+
+[target.'cfg(unix)'.dev-dependencies]
+libc = "0.2"
+
+[[test]]
+name = "stack_overflow_crash"
+path = "tests/stack_overflow_crash.rs"
+
+# NB: having one [[test]] manually defined means we need to declare them all
+
+[[test]]
+name = "double_init_fail"
+path = "tests/double_init_fail.rs"
+
+[[test]]
+name = "init_zero_threads"
+path = "tests/init_zero_threads.rs"
+
+[[test]]
+name = "scope_join"
+path = "tests/scope_join.rs"
+
+[[test]]
+name = "simple_panic"
+path = "tests/simple_panic.rs"
+
+[[test]]
+name = "scoped_threadpool"
+path = "tests/scoped_threadpool.rs"
diff --git a/compiler/rustc_thread_pool/README.md b/compiler/rustc_thread_pool/README.md
new file mode 100644
index 0000000000000..a50cc1165b810
--- /dev/null
+++ b/compiler/rustc_thread_pool/README.md
@@ -0,0 +1,10 @@
+Note: This is an unstable fork made for use in rustc
+
+Rayon-core represents the "core, stable" APIs of Rayon: join, scope, and so forth, as well as the ability to create custom thread-pools with ThreadPool.
+
+Maybe worth mentioning: users are not necessarily intended to directly access rustc_thread_pool; all its APIs are mirrored in the rayon crate. To that end, the examples in the docs use rayon::join and so forth rather than rayon_core::join.
+
+
+Please see [Rayon Docs] for details about using Rayon.
+
+[Rayon Docs]: https://docs.rs/rayon/
diff --git a/compiler/rustc_thread_pool/src/broadcast/mod.rs b/compiler/rustc_thread_pool/src/broadcast/mod.rs
new file mode 100644
index 0000000000000..9545c4b15d8fc
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/broadcast/mod.rs
@@ -0,0 +1,148 @@
+use std::fmt;
+use std::marker::PhantomData;
+use std::sync::Arc;
+
+use crate::job::{ArcJob, StackJob};
+use crate::latch::{CountLatch, LatchRef};
+use crate::registry::{Registry, WorkerThread};
+
+mod tests;
+
+/// Executes `op` within every thread in the current threadpool. If this is
+/// called from a non-Rayon thread, it will execute in the global threadpool.
+/// Any attempts to use `join`, `scope`, or parallel iterators will then operate
+/// within that threadpool. When the call has completed on each thread, returns
+/// a vector containing all of their return values.
+///
+/// For more information, see the [`ThreadPool::broadcast()`][m] method.
+///
+/// [m]: struct.ThreadPool.html#method.broadcast
+pub fn broadcast<OP, R>(op: OP) -> Vec<R>
+where
+    OP: Fn(BroadcastContext<'_>) -> R + Sync,
+    R: Send,
+{
+    // We assert that current registry has not terminated.
+    unsafe { broadcast_in(op, &Registry::current()) }
+}
+
+/// Spawns an asynchronous task on every thread in this thread-pool. This task
+/// will run in the implicit, global scope, which means that it may outlast the
+/// current stack frame -- therefore, it cannot capture any references onto the
+/// stack (you will likely need a `move` closure).
+///
+/// For more information, see the [`ThreadPool::spawn_broadcast()`][m] method.
+///
+/// [m]: struct.ThreadPool.html#method.spawn_broadcast
+pub fn spawn_broadcast<OP>(op: OP)
+where
+    OP: Fn(BroadcastContext<'_>) + Send + Sync + 'static,
+{
+    // We assert that current registry has not terminated.
+    unsafe { spawn_broadcast_in(op, &Registry::current()) }
+}
+
+/// Provides context to a closure called by `broadcast`.
+pub struct BroadcastContext<'a> {
+    worker: &'a WorkerThread,
+
+    /// Make sure to prevent auto-traits like `Send` and `Sync`.
+    _marker: PhantomData<&'a mut dyn Fn()>,
+}
+
+impl<'a> BroadcastContext<'a> {
+    pub(super) fn with<R>(f: impl FnOnce(BroadcastContext<'_>) -> R) -> R {
+        let worker_thread = WorkerThread::current();
+        assert!(!worker_thread.is_null());
+        f(BroadcastContext { worker: unsafe { &*worker_thread }, _marker: PhantomData })
+    }
+
+    /// Our index amongst the broadcast threads (ranges from `0..self.num_threads()`).
+    #[inline]
+    pub fn index(&self) -> usize {
+        self.worker.index()
+    }
+
+    /// The number of threads receiving the broadcast in the thread pool.
+    ///
+    /// # Future compatibility note
+    ///
+    /// Future versions of Rayon might vary the number of threads over time, but
+    /// this method will always return the number of threads which are actually
+    /// receiving your particular `broadcast` call.
+    #[inline]
+    pub fn num_threads(&self) -> usize {
+        self.worker.registry().num_threads()
+    }
+}
+
+impl<'a> fmt::Debug for BroadcastContext<'a> {
+    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt.debug_struct("BroadcastContext")
+            .field("index", &self.index())
+            .field("num_threads", &self.num_threads())
+            .field("pool_id", &self.worker.registry().id())
+            .finish()
+    }
+}
+
+/// Execute `op` on every thread in the pool. It will be executed on each
+/// thread when they have nothing else to do locally, before they try to
+/// steal work from other threads. This function will not return until all
+/// threads have completed the `op`.
+///
+/// Unsafe because `registry` must not yet have terminated.
+pub(super) unsafe fn broadcast_in<OP, R>(op: OP, registry: &Arc<Registry>) -> Vec<R>
+where
+    OP: Fn(BroadcastContext<'_>) -> R + Sync,
+    R: Send,
+{
+    let f = move |injected: bool| {
+        debug_assert!(injected);
+        BroadcastContext::with(&op)
+    };
+
+    let n_threads = registry.num_threads();
+    let current_thread = unsafe { WorkerThread::current().as_ref() };
+    let tlv = crate::tlv::get();
+    let latch = CountLatch::with_count(n_threads, current_thread);
+    let jobs: Vec<_> =
+        (0..n_threads).map(|_| StackJob::new(tlv, &f, LatchRef::new(&latch))).collect();
+    let job_refs = jobs.iter().map(|job| unsafe { job.as_job_ref() });
+
+    registry.inject_broadcast(job_refs);
+
+    // Wait for all jobs to complete, then collect the results, maybe propagating a panic.
+    latch.wait(current_thread);
+    jobs.into_iter().map(|job| unsafe { job.into_result() }).collect()
+}
+
+/// Execute `op` on every thread in the pool. It will be executed on each
+/// thread when they have nothing else to do locally, before they try to
+/// steal work from other threads. This function returns immediately after
+/// injecting the jobs.
+///
+/// Unsafe because `registry` must not yet have terminated.
+pub(super) unsafe fn spawn_broadcast_in<OP>(op: OP, registry: &Arc<Registry>)
+where
+    OP: Fn(BroadcastContext<'_>) + Send + Sync + 'static,
+{
+    let job = ArcJob::new({
+        let registry = Arc::clone(registry);
+        move || {
+            registry.catch_unwind(|| BroadcastContext::with(&op));
+            registry.terminate(); // (*) permit registry to terminate now
+        }
+    });
+
+    let n_threads = registry.num_threads();
+    let job_refs = (0..n_threads).map(|_| {
+        // Ensure that registry cannot terminate until this job has executed
+        // on each thread. This ref is decremented at the (*) above.
+        registry.increment_terminate_count();
+
+        ArcJob::as_static_job_ref(&job)
+    });
+
+    registry.inject_broadcast(job_refs);
+}
diff --git a/compiler/rustc_thread_pool/src/broadcast/tests.rs b/compiler/rustc_thread_pool/src/broadcast/tests.rs
new file mode 100644
index 0000000000000..fac8b8ad46660
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/broadcast/tests.rs
@@ -0,0 +1,264 @@
+#![cfg(test)]
+
+use std::sync::Arc;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::mpsc::channel;
+use std::{thread, time};
+
+use crate::ThreadPoolBuilder;
+
+#[test]
+fn broadcast_global() {
+    let v = crate::broadcast(|ctx| ctx.index());
+    assert!(v.into_iter().eq(0..crate::current_num_threads()));
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn spawn_broadcast_global() {
+    let (tx, rx) = channel();
+    crate::spawn_broadcast(move |ctx| tx.send(ctx.index()).unwrap());
+
+    let mut v: Vec<_> = rx.into_iter().collect();
+    v.sort_unstable();
+    assert!(v.into_iter().eq(0..crate::current_num_threads()));
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn broadcast_pool() {
+    let pool = ThreadPoolBuilder::new().num_threads(7).build().unwrap();
+    let v = pool.broadcast(|ctx| ctx.index());
+    assert!(v.into_iter().eq(0..7));
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn spawn_broadcast_pool() {
+    let (tx, rx) = channel();
+    let pool = ThreadPoolBuilder::new().num_threads(7).build().unwrap();
+    pool.spawn_broadcast(move |ctx| tx.send(ctx.index()).unwrap());
+
+    let mut v: Vec<_> = rx.into_iter().collect();
+    v.sort_unstable();
+    assert!(v.into_iter().eq(0..7));
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn broadcast_self() {
+    let pool = ThreadPoolBuilder::new().num_threads(7).build().unwrap();
+    let v = pool.install(|| crate::broadcast(|ctx| ctx.index()));
+    assert!(v.into_iter().eq(0..7));
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn spawn_broadcast_self() {
+    let (tx, rx) = channel();
+    let pool = ThreadPoolBuilder::new().num_threads(7).build().unwrap();
+    pool.spawn(|| crate::spawn_broadcast(move |ctx| tx.send(ctx.index()).unwrap()));
+
+    let mut v: Vec<_> = rx.into_iter().collect();
+    v.sort_unstable();
+    assert!(v.into_iter().eq(0..7));
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn broadcast_mutual() {
+    let count = AtomicUsize::new(0);
+    let pool1 = ThreadPoolBuilder::new().num_threads(3).build().unwrap();
+    let pool2 = ThreadPoolBuilder::new().num_threads(7).build().unwrap();
+    pool1.install(|| {
+        pool2.broadcast(|_| {
+            pool1.broadcast(|_| {
+                count.fetch_add(1, Ordering::Relaxed);
+            })
+        })
+    });
+    assert_eq!(count.into_inner(), 3 * 7);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn spawn_broadcast_mutual() {
+    let (tx, rx) = channel();
+    let pool1 = Arc::new(ThreadPoolBuilder::new().num_threads(3).build().unwrap());
+    let pool2 = ThreadPoolBuilder::new().num_threads(7).build().unwrap();
+    pool1.spawn({
+        let pool1 = Arc::clone(&pool1);
+        move || {
+            pool2.spawn_broadcast(move |_| {
+                let tx = tx.clone();
+                pool1.spawn_broadcast(move |_| tx.send(()).unwrap())
+            })
+        }
+    });
+    assert_eq!(rx.into_iter().count(), 3 * 7);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn broadcast_mutual_sleepy() {
+    let count = AtomicUsize::new(0);
+    let pool1 = ThreadPoolBuilder::new().num_threads(3).build().unwrap();
+    let pool2 = ThreadPoolBuilder::new().num_threads(7).build().unwrap();
+    pool1.install(|| {
+        thread::sleep(time::Duration::from_secs(1));
+        pool2.broadcast(|_| {
+            thread::sleep(time::Duration::from_secs(1));
+            pool1.broadcast(|_| {
+                thread::sleep(time::Duration::from_millis(100));
+                count.fetch_add(1, Ordering::Relaxed);
+            })
+        })
+    });
+    assert_eq!(count.into_inner(), 3 * 7);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn spawn_broadcast_mutual_sleepy() {
+    let (tx, rx) = channel();
+    let pool1 = Arc::new(ThreadPoolBuilder::new().num_threads(3).build().unwrap());
+    let pool2 = ThreadPoolBuilder::new().num_threads(7).build().unwrap();
+    pool1.spawn({
+        let pool1 = Arc::clone(&pool1);
+        move || {
+            thread::sleep(time::Duration::from_secs(1));
+            pool2.spawn_broadcast(move |_| {
+                let tx = tx.clone();
+                thread::sleep(time::Duration::from_secs(1));
+                pool1.spawn_broadcast(move |_| {
+                    thread::sleep(time::Duration::from_millis(100));
+                    tx.send(()).unwrap();
+                })
+            })
+        }
+    });
+    assert_eq!(rx.into_iter().count(), 3 * 7);
+}
+
+#[test]
+#[cfg_attr(not(panic = "unwind"), ignore)]
+fn broadcast_panic_one() {
+    let count = AtomicUsize::new(0);
+    let pool = ThreadPoolBuilder::new().num_threads(7).build().unwrap();
+    let result = crate::unwind::halt_unwinding(|| {
+        pool.broadcast(|ctx| {
+            count.fetch_add(1, Ordering::Relaxed);
+            if ctx.index() == 3 {
+                panic!("Hello, world!");
+            }
+        })
+    });
+    assert_eq!(count.into_inner(), 7);
+    assert!(result.is_err(), "broadcast panic should propagate!");
+}
+
+#[test]
+#[cfg_attr(not(panic = "unwind"), ignore)]
+fn spawn_broadcast_panic_one() {
+    let (tx, rx) = channel();
+    let (panic_tx, panic_rx) = channel();
+    let pool = ThreadPoolBuilder::new()
+        .num_threads(7)
+        .panic_handler(move |e| panic_tx.send(e).unwrap())
+        .build()
+        .unwrap();
+    pool.spawn_broadcast(move |ctx| {
+        tx.send(()).unwrap();
+        if ctx.index() == 3 {
+            panic!("Hello, world!");
+        }
+    });
+    drop(pool); // including panic_tx
+    assert_eq!(rx.into_iter().count(), 7);
+    assert_eq!(panic_rx.into_iter().count(), 1);
+}
+
+#[test]
+#[cfg_attr(not(panic = "unwind"), ignore)]
+fn broadcast_panic_many() {
+    let count = AtomicUsize::new(0);
+    let pool = ThreadPoolBuilder::new().num_threads(7).build().unwrap();
+    let result = crate::unwind::halt_unwinding(|| {
+        pool.broadcast(|ctx| {
+            count.fetch_add(1, Ordering::Relaxed);
+            if ctx.index() % 2 == 0 {
+                panic!("Hello, world!");
+            }
+        })
+    });
+    assert_eq!(count.into_inner(), 7);
+    assert!(result.is_err(), "broadcast panic should propagate!");
+}
+
+#[test]
+#[cfg_attr(not(panic = "unwind"), ignore)]
+fn spawn_broadcast_panic_many() {
+    let (tx, rx) = channel();
+    let (panic_tx, panic_rx) = channel();
+    let pool = ThreadPoolBuilder::new()
+        .num_threads(7)
+        .panic_handler(move |e| panic_tx.send(e).unwrap())
+        .build()
+        .unwrap();
+    pool.spawn_broadcast(move |ctx| {
+        tx.send(()).unwrap();
+        if ctx.index() % 2 == 0 {
+            panic!("Hello, world!");
+        }
+    });
+    drop(pool); // including panic_tx
+    assert_eq!(rx.into_iter().count(), 7);
+    assert_eq!(panic_rx.into_iter().count(), 4);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn broadcast_sleep_race() {
+    let test_duration = time::Duration::from_secs(1);
+    let pool = ThreadPoolBuilder::new().num_threads(7).build().unwrap();
+    let start = time::Instant::now();
+    while start.elapsed() < test_duration {
+        pool.broadcast(|ctx| {
+            // A slight spread of sleep duration increases the chance that one
+            // of the threads will race in the pool's idle sleep afterward.
+            thread::sleep(time::Duration::from_micros(ctx.index() as u64));
+        });
+    }
+}
+
+#[test]
+fn broadcast_after_spawn_broadcast() {
+    let (tx, rx) = channel();
+
+    // Queue a non-blocking spawn_broadcast.
+    crate::spawn_broadcast(move |ctx| tx.send(ctx.index()).unwrap());
+
+    // This blocking broadcast runs after all prior broadcasts.
+    crate::broadcast(|_| {});
+
+    // The spawn_broadcast **must** have run by now on all threads.
+    let mut v: Vec<_> = rx.try_iter().collect();
+    v.sort_unstable();
+    assert!(v.into_iter().eq(0..crate::current_num_threads()));
+}
+
+#[test]
+fn broadcast_after_spawn() {
+    let (tx, rx) = channel();
+
+    // Queue a regular spawn on a thread-local deque.
+    crate::registry::in_worker(move |_, _| {
+        crate::spawn(move || tx.send(22).unwrap());
+    });
+
+    // Broadcast runs after the local deque is empty.
+    crate::broadcast(|_| {});
+
+    // The spawn **must** have run by now.
+    assert_eq!(22, rx.try_recv().unwrap());
+}
diff --git a/compiler/rustc_thread_pool/src/compile_fail/mod.rs b/compiler/rustc_thread_pool/src/compile_fail/mod.rs
new file mode 100644
index 0000000000000..f2ec646a4d3d2
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/compile_fail/mod.rs
@@ -0,0 +1,7 @@
+// These modules contain `compile_fail` doc tests.
+mod quicksort_race1;
+mod quicksort_race2;
+mod quicksort_race3;
+mod rc_return;
+mod rc_upvar;
+mod scope_join_bad;
diff --git a/compiler/rustc_thread_pool/src/compile_fail/quicksort_race1.rs b/compiler/rustc_thread_pool/src/compile_fail/quicksort_race1.rs
new file mode 100644
index 0000000000000..f6dbc76969938
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/compile_fail/quicksort_race1.rs
@@ -0,0 +1,28 @@
+/*! ```compile_fail,E0524
+
+fn quick_sort<T:PartialOrd+Send>(v: &mut [T]) {
+    if v.len() <= 1 {
+        return;
+    }
+
+    let mid = partition(v);
+    let (lo, _hi) = v.split_at_mut(mid);
+    rustc_thread_pool::join(|| quick_sort(lo), || quick_sort(lo)); //~ ERROR
+}
+
+fn partition<T:PartialOrd+Send>(v: &mut [T]) -> usize {
+    let pivot = v.len() - 1;
+    let mut i = 0;
+    for j in 0..pivot {
+        if v[j] <= v[pivot] {
+            v.swap(i, j);
+            i += 1;
+        }
+    }
+    v.swap(i, pivot);
+    i
+}
+
+fn main() { }
+
+``` */
diff --git a/compiler/rustc_thread_pool/src/compile_fail/quicksort_race2.rs b/compiler/rustc_thread_pool/src/compile_fail/quicksort_race2.rs
new file mode 100644
index 0000000000000..ccd737a700d8f
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/compile_fail/quicksort_race2.rs
@@ -0,0 +1,28 @@
+/*! ```compile_fail,E0500
+
+fn quick_sort<T:PartialOrd+Send>(v: &mut [T]) {
+    if v.len() <= 1 {
+        return;
+    }
+
+    let mid = partition(v);
+    let (lo, _hi) = v.split_at_mut(mid);
+    rustc_thread_pool::join(|| quick_sort(lo), || quick_sort(v)); //~ ERROR
+}
+
+fn partition<T:PartialOrd+Send>(v: &mut [T]) -> usize {
+    let pivot = v.len() - 1;
+    let mut i = 0;
+    for j in 0..pivot {
+        if v[j] <= v[pivot] {
+            v.swap(i, j);
+            i += 1;
+        }
+    }
+    v.swap(i, pivot);
+    i
+}
+
+fn main() { }
+
+``` */
diff --git a/compiler/rustc_thread_pool/src/compile_fail/quicksort_race3.rs b/compiler/rustc_thread_pool/src/compile_fail/quicksort_race3.rs
new file mode 100644
index 0000000000000..6acdf08443371
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/compile_fail/quicksort_race3.rs
@@ -0,0 +1,28 @@
+/*! ```compile_fail,E0524
+
+fn quick_sort<T:PartialOrd+Send>(v: &mut [T]) {
+    if v.len() <= 1 {
+        return;
+    }
+
+    let mid = partition(v);
+    let (_lo, hi) = v.split_at_mut(mid);
+    rustc_thread_pool::join(|| quick_sort(hi), || quick_sort(hi)); //~ ERROR
+}
+
+fn partition<T:PartialOrd+Send>(v: &mut [T]) -> usize {
+    let pivot = v.len() - 1;
+    let mut i = 0;
+    for j in 0..pivot {
+        if v[j] <= v[pivot] {
+            v.swap(i, j);
+            i += 1;
+        }
+    }
+    v.swap(i, pivot);
+    i
+}
+
+fn main() { }
+
+``` */
diff --git a/compiler/rustc_thread_pool/src/compile_fail/rc_return.rs b/compiler/rustc_thread_pool/src/compile_fail/rc_return.rs
new file mode 100644
index 0000000000000..165c685aba101
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/compile_fail/rc_return.rs
@@ -0,0 +1,17 @@
+/** ```compile_fail,E0277
+
+use std::rc::Rc;
+
+rustc_thread_pool::join(|| Rc::new(22), || ()); //~ ERROR
+
+``` */
+mod left {}
+
+/** ```compile_fail,E0277
+
+use std::rc::Rc;
+
+rustc_thread_pool::join(|| (), || Rc::new(23)); //~ ERROR
+
+``` */
+mod right {}
diff --git a/compiler/rustc_thread_pool/src/compile_fail/rc_upvar.rs b/compiler/rustc_thread_pool/src/compile_fail/rc_upvar.rs
new file mode 100644
index 0000000000000..6dc9ead48a09e
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/compile_fail/rc_upvar.rs
@@ -0,0 +1,9 @@
+/*! ```compile_fail,E0277
+
+use std::rc::Rc;
+
+let r = Rc::new(22);
+rustc_thread_pool::join(|| r.clone(), || r.clone());
+//~^ ERROR
+
+``` */
diff --git a/compiler/rustc_thread_pool/src/compile_fail/scope_join_bad.rs b/compiler/rustc_thread_pool/src/compile_fail/scope_join_bad.rs
new file mode 100644
index 0000000000000..e65abfc3c1e88
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/compile_fail/scope_join_bad.rs
@@ -0,0 +1,24 @@
+/*! ```compile_fail,E0373
+
+fn bad_scope<F>(f: F)
+    where F: FnOnce(&i32) + Send,
+{
+    rustc_thread_pool::scope(|s| {
+        let x = 22;
+        s.spawn(|_| f(&x)); //~ ERROR `x` does not live long enough
+    });
+}
+
+fn good_scope<F>(f: F)
+    where F: FnOnce(&i32) + Send,
+{
+    let x = 22;
+    rustc_thread_pool::scope(|s| {
+        s.spawn(|_| f(&x));
+    });
+}
+
+fn main() {
+}
+
+``` */
diff --git a/compiler/rustc_thread_pool/src/job.rs b/compiler/rustc_thread_pool/src/job.rs
new file mode 100644
index 0000000000000..e6e84ac2320bf
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/job.rs
@@ -0,0 +1,277 @@
+use std::any::Any;
+use std::cell::UnsafeCell;
+use std::mem;
+use std::sync::Arc;
+
+use crossbeam_deque::{Injector, Steal};
+
+use crate::latch::Latch;
+use crate::tlv::Tlv;
+use crate::{tlv, unwind};
+
+pub(super) enum JobResult<T> {
+    None,
+    Ok(T),
+    Panic(Box<dyn Any + Send>),
+}
+
+/// A `Job` is used to advertise work for other threads that they may
+/// want to steal. In accordance with time honored tradition, jobs are
+/// arranged in a deque, so that thieves can take from the top of the
+/// deque while the main worker manages the bottom of the deque. This
+/// deque is managed by the `thread_pool` module.
+pub(super) trait Job {
+    /// Unsafe: this may be called from a different thread than the one
+    /// which scheduled the job, so the implementer must ensure the
+    /// appropriate traits are met, whether `Send`, `Sync`, or both.
+    unsafe fn execute(this: *const ());
+}
+
+/// Effectively a Job trait object. Each JobRef **must** be executed
+/// exactly once, or else data may leak.
+///
+/// Internally, we store the job's data in a `*const ()` pointer. The
+/// true type is something like `*const StackJob<...>`, but we hide
+/// it. We also carry the "execute fn" from the `Job` trait.
+pub(super) struct JobRef {
+    pointer: *const (),
+    execute_fn: unsafe fn(*const ()),
+}
+
+unsafe impl Send for JobRef {}
+unsafe impl Sync for JobRef {}
+
+impl JobRef {
+    /// Unsafe: caller asserts that `data` will remain valid until the
+    /// job is executed.
+    pub(super) unsafe fn new<T>(data: *const T) -> JobRef
+    where
+        T: Job,
+    {
+        // erase types:
+        JobRef { pointer: data as *const (), execute_fn: <T as Job>::execute }
+    }
+
+    /// Returns an opaque handle that can be saved and compared,
+    /// without making `JobRef` itself `Copy + Eq`.
+    #[inline]
+    pub(super) fn id(&self) -> impl Eq {
+        (self.pointer, self.execute_fn)
+    }
+
+    #[inline]
+    pub(super) unsafe fn execute(self) {
+        unsafe { (self.execute_fn)(self.pointer) }
+    }
+}
+
+/// A job that will be owned by a stack slot. This means that when it
+/// executes it need not free any heap data, the cleanup occurs when
+/// the stack frame is later popped. The function parameter indicates
+/// `true` if the job was stolen -- executed on a different thread.
+pub(super) struct StackJob<L, F, R>
+where
+    L: Latch + Sync,
+    F: FnOnce(bool) -> R + Send,
+    R: Send,
+{
+    pub(super) latch: L,
+    func: UnsafeCell<Option<F>>,
+    result: UnsafeCell<JobResult<R>>,
+    tlv: Tlv,
+}
+
+impl<L, F, R> StackJob<L, F, R>
+where
+    L: Latch + Sync,
+    F: FnOnce(bool) -> R + Send,
+    R: Send,
+{
+    pub(super) fn new(tlv: Tlv, func: F, latch: L) -> StackJob<L, F, R> {
+        StackJob {
+            latch,
+            func: UnsafeCell::new(Some(func)),
+            result: UnsafeCell::new(JobResult::None),
+            tlv,
+        }
+    }
+
+    pub(super) unsafe fn as_job_ref(&self) -> JobRef {
+        unsafe { JobRef::new(self) }
+    }
+
+    pub(super) unsafe fn run_inline(self, stolen: bool) -> R {
+        self.func.into_inner().unwrap()(stolen)
+    }
+
+    pub(super) unsafe fn into_result(self) -> R {
+        self.result.into_inner().into_return_value()
+    }
+}
+
+impl<L, F, R> Job for StackJob<L, F, R>
+where
+    L: Latch + Sync,
+    F: FnOnce(bool) -> R + Send,
+    R: Send,
+{
+    unsafe fn execute(this: *const ()) {
+        let this = unsafe { &*(this as *const Self) };
+        tlv::set(this.tlv);
+        let abort = unwind::AbortIfPanic;
+        let func = unsafe { (*this.func.get()).take().unwrap() };
+        unsafe {
+            (*this.result.get()) = JobResult::call(func);
+        }
+        unsafe {
+            Latch::set(&this.latch);
+        }
+        mem::forget(abort);
+    }
+}
+
+/// Represents a job stored in the heap. Used to implement
+/// `scope`. Unlike `StackJob`, when executed, `HeapJob` simply
+/// invokes a closure, which then triggers the appropriate logic to
+/// signal that the job executed.
+///
+/// (Probably `StackJob` should be refactored in a similar fashion.)
+pub(super) struct HeapJob<BODY>
+where
+    BODY: FnOnce() + Send,
+{
+    job: BODY,
+    tlv: Tlv,
+}
+
+impl<BODY> HeapJob<BODY>
+where
+    BODY: FnOnce() + Send,
+{
+    pub(super) fn new(tlv: Tlv, job: BODY) -> Box<Self> {
+        Box::new(HeapJob { job, tlv })
+    }
+
+    /// Creates a `JobRef` from this job -- note that this hides all
+    /// lifetimes, so it is up to you to ensure that this JobRef
+    /// doesn't outlive any data that it closes over.
+    pub(super) unsafe fn into_job_ref(self: Box<Self>) -> JobRef {
+        unsafe { JobRef::new(Box::into_raw(self)) }
+    }
+
+    /// Creates a static `JobRef` from this job.
+    pub(super) fn into_static_job_ref(self: Box<Self>) -> JobRef
+    where
+        BODY: 'static,
+    {
+        unsafe { self.into_job_ref() }
+    }
+}
+
+impl<BODY> Job for HeapJob<BODY>
+where
+    BODY: FnOnce() + Send,
+{
+    unsafe fn execute(this: *const ()) {
+        let this = unsafe { Box::from_raw(this as *mut Self) };
+        tlv::set(this.tlv);
+        (this.job)();
+    }
+}
+
+/// Represents a job stored in an `Arc` -- like `HeapJob`, but may
+/// be turned into multiple `JobRef`s and called multiple times.
+pub(super) struct ArcJob<BODY>
+where
+    BODY: Fn() + Send + Sync,
+{
+    job: BODY,
+}
+
+impl<BODY> ArcJob<BODY>
+where
+    BODY: Fn() + Send + Sync,
+{
+    pub(super) fn new(job: BODY) -> Arc<Self> {
+        Arc::new(ArcJob { job })
+    }
+
+    /// Creates a `JobRef` from this job -- note that this hides all
+    /// lifetimes, so it is up to you to ensure that this JobRef
+    /// doesn't outlive any data that it closes over.
+    pub(super) unsafe fn as_job_ref(this: &Arc<Self>) -> JobRef {
+        unsafe { JobRef::new(Arc::into_raw(Arc::clone(this))) }
+    }
+
+    /// Creates a static `JobRef` from this job.
+    pub(super) fn as_static_job_ref(this: &Arc<Self>) -> JobRef
+    where
+        BODY: 'static,
+    {
+        unsafe { Self::as_job_ref(this) }
+    }
+}
+
+impl<BODY> Job for ArcJob<BODY>
+where
+    BODY: Fn() + Send + Sync,
+{
+    unsafe fn execute(this: *const ()) {
+        let this = unsafe { Arc::from_raw(this as *mut Self) };
+        (this.job)();
+    }
+}
+
+impl<T> JobResult<T> {
+    fn call(func: impl FnOnce(bool) -> T) -> Self {
+        match unwind::halt_unwinding(|| func(true)) {
+            Ok(x) => JobResult::Ok(x),
+            Err(x) => JobResult::Panic(x),
+        }
+    }
+
+    /// Convert the `JobResult` for a job that has finished (and hence
+    /// its JobResult is populated) into its return value.
+    ///
+    /// NB. This will panic if the job panicked.
+    pub(super) fn into_return_value(self) -> T {
+        match self {
+            JobResult::None => unreachable!(),
+            JobResult::Ok(x) => x,
+            JobResult::Panic(x) => unwind::resume_unwinding(x),
+        }
+    }
+}
+
+/// Indirect queue to provide FIFO job priority.
+pub(super) struct JobFifo {
+    inner: Injector<JobRef>,
+}
+
+impl JobFifo {
+    pub(super) fn new() -> Self {
+        JobFifo { inner: Injector::new() }
+    }
+
+    pub(super) unsafe fn push(&self, job_ref: JobRef) -> JobRef {
+        // A little indirection ensures that spawns are always prioritized in FIFO order. The
+        // jobs in a thread's deque may be popped from the back (LIFO) or stolen from the front
+        // (FIFO), but either way they will end up popping from the front of this queue.
+        self.inner.push(job_ref);
+        unsafe { JobRef::new(self) }
+    }
+}
+
+impl Job for JobFifo {
+    unsafe fn execute(this: *const ()) {
+        // We "execute" a queue by executing its first job, FIFO.
+        let this = unsafe { &*(this as *const Self) };
+        loop {
+            match this.inner.steal() {
+                Steal::Success(job_ref) => break unsafe { job_ref.execute() },
+                Steal::Empty => panic!("FIFO is empty"),
+                Steal::Retry => {}
+            }
+        }
+    }
+}
diff --git a/compiler/rustc_thread_pool/src/join/mod.rs b/compiler/rustc_thread_pool/src/join/mod.rs
new file mode 100644
index 0000000000000..f285362c19b1b
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/join/mod.rs
@@ -0,0 +1,201 @@
+use std::any::Any;
+
+use crate::job::StackJob;
+use crate::latch::SpinLatch;
+use crate::registry::{self, WorkerThread};
+use crate::tlv::{self, Tlv};
+use crate::{FnContext, unwind};
+
+#[cfg(test)]
+mod tests;
+
+/// Takes two closures and *potentially* runs them in parallel. It
+/// returns a pair of the results from those closures.
+///
+/// Conceptually, calling `join()` is similar to spawning two threads,
+/// one executing each of the two closures. However, the
+/// implementation is quite different and incurs very low
+/// overhead. The underlying technique is called "work stealing": the
+/// Rayon runtime uses a fixed pool of worker threads and attempts to
+/// only execute code in parallel when there are idle CPUs to handle
+/// it.
+///
+/// When `join` is called from outside the thread pool, the calling
+/// thread will block while the closures execute in the pool. When
+/// `join` is called within the pool, the calling thread still actively
+/// participates in the thread pool. It will begin by executing closure
+/// A (on the current thread). While it is doing that, it will advertise
+/// closure B as being available for other threads to execute. Once closure A
+/// has completed, the current thread will try to execute closure B;
+/// if however closure B has been stolen, then it will look for other work
+/// while waiting for the thief to fully execute closure B. (This is the
+/// typical work-stealing strategy).
+///
+/// # Examples
+///
+/// This example uses join to perform a quick-sort (note this is not a
+/// particularly optimized implementation: if you **actually** want to
+/// sort for real, you should prefer [the `par_sort` method] offered
+/// by Rayon).
+///
+/// [the `par_sort` method]: ../rayon/slice/trait.ParallelSliceMut.html#method.par_sort
+///
+/// ```rust
+/// # use rustc_thread_pool as rayon;
+/// let mut v = vec![5, 1, 8, 22, 0, 44];
+/// quick_sort(&mut v);
+/// assert_eq!(v, vec![0, 1, 5, 8, 22, 44]);
+///
+/// fn quick_sort<T:PartialOrd+Send>(v: &mut [T]) {
+///    if v.len() > 1 {
+///        let mid = partition(v);
+///        let (lo, hi) = v.split_at_mut(mid);
+///        rayon::join(|| quick_sort(lo),
+///                    || quick_sort(hi));
+///    }
+/// }
+///
+/// // Partition rearranges all items `<=` to the pivot
+/// // item (arbitrary selected to be the last item in the slice)
+/// // to the first half of the slice. It then returns the
+/// // "dividing point" where the pivot is placed.
+/// fn partition<T:PartialOrd+Send>(v: &mut [T]) -> usize {
+///     let pivot = v.len() - 1;
+///     let mut i = 0;
+///     for j in 0..pivot {
+///         if v[j] <= v[pivot] {
+///             v.swap(i, j);
+///             i += 1;
+///         }
+///     }
+///     v.swap(i, pivot);
+///     i
+/// }
+/// ```
+///
+/// # Warning about blocking I/O
+///
+/// The assumption is that the closures given to `join()` are
+/// CPU-bound tasks that do not perform I/O or other blocking
+/// operations. If you do perform I/O, and that I/O should block
+/// (e.g., waiting for a network request), the overall performance may
+/// be poor. Moreover, if you cause one closure to be blocked waiting
+/// on another (for example, using a channel), that could lead to a
+/// deadlock.
+///
+/// # Panics
+///
+/// No matter what happens, both closures will always be executed. If
+/// a single closure panics, whether it be the first or second
+/// closure, that panic will be propagated and hence `join()` will
+/// panic with the same panic value. If both closures panic, `join()`
+/// will panic with the panic value from the first closure.
+pub fn join<A, B, RA, RB>(oper_a: A, oper_b: B) -> (RA, RB)
+where
+    A: FnOnce() -> RA + Send,
+    B: FnOnce() -> RB + Send,
+    RA: Send,
+    RB: Send,
+{
+    #[inline]
+    fn call<R>(f: impl FnOnce() -> R) -> impl FnOnce(FnContext) -> R {
+        move |_| f()
+    }
+
+    join_context(call(oper_a), call(oper_b))
+}
+
+/// Identical to `join`, except that the closures have a parameter
+/// that provides context for the way the closure has been called,
+/// especially indicating whether they're executing on a different
+/// thread than where `join_context` was called. This will occur if
+/// the second job is stolen by a different thread, or if
+/// `join_context` was called from outside the thread pool to begin
+/// with.
+pub fn join_context<A, B, RA, RB>(oper_a: A, oper_b: B) -> (RA, RB)
+where
+    A: FnOnce(FnContext) -> RA + Send,
+    B: FnOnce(FnContext) -> RB + Send,
+    RA: Send,
+    RB: Send,
+{
+    #[inline]
+    fn call_a<R>(f: impl FnOnce(FnContext) -> R, injected: bool) -> impl FnOnce() -> R {
+        move || f(FnContext::new(injected))
+    }
+
+    #[inline]
+    fn call_b<R>(f: impl FnOnce(FnContext) -> R) -> impl FnOnce(bool) -> R {
+        move |migrated| f(FnContext::new(migrated))
+    }
+
+    registry::in_worker(|worker_thread, injected| unsafe {
+        let tlv = tlv::get();
+        // Create virtual wrapper for task b; this all has to be
+        // done here so that the stack frame can keep it all live
+        // long enough.
+        let job_b = StackJob::new(tlv, call_b(oper_b), SpinLatch::new(worker_thread));
+        let job_b_ref = job_b.as_job_ref();
+        let job_b_id = job_b_ref.id();
+        worker_thread.push(job_b_ref);
+
+        // Execute task a; hopefully b gets stolen in the meantime.
+        let status_a = unwind::halt_unwinding(call_a(oper_a, injected));
+        let result_a = match status_a {
+            Ok(v) => v,
+            Err(err) => join_recover_from_panic(worker_thread, &job_b.latch, err, tlv),
+        };
+
+        // Now that task A has finished, try to pop job B from the
+        // local stack. It may already have been popped by job A; it
+        // may also have been stolen. There may also be some tasks
+        // pushed on top of it in the stack, and we will have to pop
+        // those off to get to it.
+        while !job_b.latch.probe() {
+            if let Some(job) = worker_thread.take_local_job() {
+                if job_b_id == job.id() {
+                    // Found it! Let's run it.
+                    //
+                    // Note that this could panic, but it's ok if we unwind here.
+
+                    // Restore the TLV since we might have run some jobs overwriting it when waiting for job b.
+                    tlv::set(tlv);
+
+                    let result_b = job_b.run_inline(injected);
+                    return (result_a, result_b);
+                } else {
+                    worker_thread.execute(job);
+                }
+            } else {
+                // Local deque is empty. Time to steal from other
+                // threads.
+                worker_thread.wait_until(&job_b.latch);
+                debug_assert!(job_b.latch.probe());
+                break;
+            }
+        }
+
+        // Restore the TLV since we might have run some jobs overwriting it when waiting for job b.
+        tlv::set(tlv);
+
+        (result_a, job_b.into_result())
+    })
+}
+
+/// If job A panics, we still cannot return until we are sure that job
+/// B is complete. This is because it may contain references into the
+/// enclosing stack frame(s).
+#[cold] // cold path
+unsafe fn join_recover_from_panic(
+    worker_thread: &WorkerThread,
+    job_b_latch: &SpinLatch<'_>,
+    err: Box<dyn Any + Send>,
+    tlv: Tlv,
+) -> ! {
+    unsafe { worker_thread.wait_until(job_b_latch) };
+
+    // Restore the TLV since we might have run some jobs overwriting it when waiting for job b.
+    tlv::set(tlv);
+
+    unwind::resume_unwinding(err)
+}
diff --git a/compiler/rustc_thread_pool/src/join/tests.rs b/compiler/rustc_thread_pool/src/join/tests.rs
new file mode 100644
index 0000000000000..9df99072c3a10
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/join/tests.rs
@@ -0,0 +1,151 @@
+//! Tests for the join code.
+
+use rand::distr::StandardUniform;
+use rand::{Rng, SeedableRng};
+use rand_xorshift::XorShiftRng;
+
+use super::*;
+use crate::ThreadPoolBuilder;
+
+fn quick_sort<T: PartialOrd + Send>(v: &mut [T]) {
+    if v.len() <= 1 {
+        return;
+    }
+
+    let mid = partition(v);
+    let (lo, hi) = v.split_at_mut(mid);
+    join(|| quick_sort(lo), || quick_sort(hi));
+}
+
+fn partition<T: PartialOrd + Send>(v: &mut [T]) -> usize {
+    let pivot = v.len() - 1;
+    let mut i = 0;
+    for j in 0..pivot {
+        if v[j] <= v[pivot] {
+            v.swap(i, j);
+            i += 1;
+        }
+    }
+    v.swap(i, pivot);
+    i
+}
+
+fn seeded_rng() -> XorShiftRng {
+    let mut seed = <XorShiftRng as SeedableRng>::Seed::default();
+    (0..).zip(seed.as_mut()).for_each(|(i, x)| *x = i);
+    XorShiftRng::from_seed(seed)
+}
+
+#[test]
+fn sort() {
+    let rng = seeded_rng();
+    let mut data: Vec<u32> = rng.sample_iter(&StandardUniform).take(6 * 1024).collect();
+    let mut sorted_data = data.clone();
+    sorted_data.sort();
+    quick_sort(&mut data);
+    assert_eq!(data, sorted_data);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn sort_in_pool() {
+    let rng = seeded_rng();
+    let mut data: Vec<u32> = rng.sample_iter(&StandardUniform).take(12 * 1024).collect();
+
+    let pool = ThreadPoolBuilder::new().build().unwrap();
+    let mut sorted_data = data.clone();
+    sorted_data.sort();
+    pool.install(|| quick_sort(&mut data));
+    assert_eq!(data, sorted_data);
+}
+
+#[test]
+#[should_panic(expected = "Hello, world!")]
+fn panic_propagate_a() {
+    join(|| panic!("Hello, world!"), || ());
+}
+
+#[test]
+#[should_panic(expected = "Hello, world!")]
+fn panic_propagate_b() {
+    join(|| (), || panic!("Hello, world!"));
+}
+
+#[test]
+#[should_panic(expected = "Hello, world!")]
+fn panic_propagate_both() {
+    join(|| panic!("Hello, world!"), || panic!("Goodbye, world!"));
+}
+
+#[test]
+#[cfg_attr(not(panic = "unwind"), ignore)]
+fn panic_b_still_executes() {
+    let mut x = false;
+    match unwind::halt_unwinding(|| join(|| panic!("Hello, world!"), || x = true)) {
+        Ok(_) => panic!("failed to propagate panic from closure A,"),
+        Err(_) => assert!(x, "closure b failed to execute"),
+    }
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn join_context_both() {
+    // If we're not in a pool, both should be marked stolen as they're injected.
+    let (a_migrated, b_migrated) = join_context(|a| a.migrated(), |b| b.migrated());
+    assert!(a_migrated);
+    assert!(b_migrated);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn join_context_neither() {
+    // If we're already in a 1-thread pool, neither job should be stolen.
+    let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+    let (a_migrated, b_migrated) =
+        pool.install(|| join_context(|a| a.migrated(), |b| b.migrated()));
+    assert!(!a_migrated);
+    assert!(!b_migrated);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn join_context_second() {
+    use std::sync::Barrier;
+
+    // If we're already in a 2-thread pool, the second job should be stolen.
+    let barrier = Barrier::new(2);
+    let pool = ThreadPoolBuilder::new().num_threads(2).build().unwrap();
+    let (a_migrated, b_migrated) = pool.install(|| {
+        join_context(
+            |a| {
+                barrier.wait();
+                a.migrated()
+            },
+            |b| {
+                barrier.wait();
+                b.migrated()
+            },
+        )
+    });
+    assert!(!a_migrated);
+    assert!(b_migrated);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn join_counter_overflow() {
+    const MAX: u32 = 500_000;
+
+    let mut i = 0;
+    let mut j = 0;
+    let pool = ThreadPoolBuilder::new().num_threads(2).build().unwrap();
+
+    // Hammer on join a bunch of times -- used to hit overflow debug-assertions
+    // in JEC on 32-bit targets: https://github.com/rayon-rs/rayon/issues/797
+    for _ in 0..MAX {
+        pool.join(|| i += 1, || j += 1);
+    }
+
+    assert_eq!(i, MAX);
+    assert_eq!(j, MAX);
+}
diff --git a/compiler/rustc_thread_pool/src/latch.rs b/compiler/rustc_thread_pool/src/latch.rs
new file mode 100644
index 0000000000000..49ba62d3bea3d
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/latch.rs
@@ -0,0 +1,431 @@
+use std::marker::PhantomData;
+use std::ops::Deref;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::{Arc, Condvar, Mutex};
+
+use crate::registry::{Registry, WorkerThread};
+
+/// We define various kinds of latches, which are all a primitive signaling
+/// mechanism. A latch starts as false. Eventually someone calls `set()` and
+/// it becomes true. You can test if it has been set by calling `probe()`.
+///
+/// Some kinds of latches, but not all, support a `wait()` operation
+/// that will wait until the latch is set, blocking efficiently. That
+/// is not part of the trait since it is not possibly to do with all
+/// latches.
+///
+/// The intention is that `set()` is called once, but `probe()` may be
+/// called any number of times. Once `probe()` returns true, the memory
+/// effects that occurred before `set()` become visible.
+///
+/// It'd probably be better to refactor the API into two paired types,
+/// but that's a bit of work, and this is not a public API.
+///
+/// ## Memory ordering
+///
+/// Latches need to guarantee two things:
+///
+/// - Once `probe()` returns true, all memory effects from the `set()`
+///   are visible (in other words, the set should synchronize-with
+///   the probe).
+/// - Once `set()` occurs, the next `probe()` *will* observe it. This
+///   typically requires a seq-cst ordering. See [the "tickle-then-get-sleepy" scenario in the sleep
+///   README](/src/sleep/README.md#tickle-then-get-sleepy) for details.
+pub(super) trait Latch {
+    /// Set the latch, signalling others.
+    ///
+    /// # WARNING
+    ///
+    /// Setting a latch triggers other threads to wake up and (in some
+    /// cases) complete. This may, in turn, cause memory to be
+    /// deallocated and so forth. One must be very careful about this,
+    /// and it's typically better to read all the fields you will need
+    /// to access *before* a latch is set!
+    ///
+    /// This function operates on `*const Self` instead of `&self` to allow it
+    /// to become dangling during this call. The caller must ensure that the
+    /// pointer is valid upon entry, and not invalidated during the call by any
+    /// actions other than `set` itself.
+    unsafe fn set(this: *const Self);
+}
+
+pub(super) trait AsCoreLatch {
+    fn as_core_latch(&self) -> &CoreLatch;
+}
+
+/// Latch is not set, owning thread is awake
+const UNSET: usize = 0;
+
+/// Latch is not set, owning thread is going to sleep on this latch
+/// (but has not yet fallen asleep).
+const SLEEPY: usize = 1;
+
+/// Latch is not set, owning thread is asleep on this latch and
+/// must be awoken.
+const SLEEPING: usize = 2;
+
+/// Latch is set.
+const SET: usize = 3;
+
+/// Spin latches are the simplest, most efficient kind, but they do
+/// not support a `wait()` operation. They just have a boolean flag
+/// that becomes true when `set()` is called.
+#[derive(Debug)]
+pub(super) struct CoreLatch {
+    state: AtomicUsize,
+}
+
+impl CoreLatch {
+    #[inline]
+    fn new() -> Self {
+        Self { state: AtomicUsize::new(0) }
+    }
+
+    /// Invoked by owning thread as it prepares to sleep. Returns true
+    /// if the owning thread may proceed to fall asleep, false if the
+    /// latch was set in the meantime.
+    #[inline]
+    pub(super) fn get_sleepy(&self) -> bool {
+        self.state.compare_exchange(UNSET, SLEEPY, Ordering::SeqCst, Ordering::Relaxed).is_ok()
+    }
+
+    /// Invoked by owning thread as it falls asleep sleep. Returns
+    /// true if the owning thread should block, or false if the latch
+    /// was set in the meantime.
+    #[inline]
+    pub(super) fn fall_asleep(&self) -> bool {
+        self.state.compare_exchange(SLEEPY, SLEEPING, Ordering::SeqCst, Ordering::Relaxed).is_ok()
+    }
+
+    /// Invoked by owning thread as it falls asleep sleep. Returns
+    /// true if the owning thread should block, or false if the latch
+    /// was set in the meantime.
+    #[inline]
+    pub(super) fn wake_up(&self) {
+        if !self.probe() {
+            let _ =
+                self.state.compare_exchange(SLEEPING, UNSET, Ordering::SeqCst, Ordering::Relaxed);
+        }
+    }
+
+    /// Set the latch. If this returns true, the owning thread was sleeping
+    /// and must be awoken.
+    ///
+    /// This is private because, typically, setting a latch involves
+    /// doing some wakeups; those are encapsulated in the surrounding
+    /// latch code.
+    #[inline]
+    unsafe fn set(this: *const Self) -> bool {
+        let old_state = unsafe { (*this).state.swap(SET, Ordering::AcqRel) };
+        old_state == SLEEPING
+    }
+
+    /// Test if this latch has been set.
+    #[inline]
+    pub(super) fn probe(&self) -> bool {
+        self.state.load(Ordering::Acquire) == SET
+    }
+}
+
+impl AsCoreLatch for CoreLatch {
+    #[inline]
+    fn as_core_latch(&self) -> &CoreLatch {
+        self
+    }
+}
+
+/// Spin latches are the simplest, most efficient kind, but they do
+/// not support a `wait()` operation. They just have a boolean flag
+/// that becomes true when `set()` is called.
+pub(super) struct SpinLatch<'r> {
+    core_latch: CoreLatch,
+    registry: &'r Arc<Registry>,
+    target_worker_index: usize,
+    cross: bool,
+}
+
+impl<'r> SpinLatch<'r> {
+    /// Creates a new spin latch that is owned by `thread`. This means
+    /// that `thread` is the only thread that should be blocking on
+    /// this latch -- it also means that when the latch is set, we
+    /// will wake `thread` if it is sleeping.
+    #[inline]
+    pub(super) fn new(thread: &'r WorkerThread) -> SpinLatch<'r> {
+        SpinLatch {
+            core_latch: CoreLatch::new(),
+            registry: thread.registry(),
+            target_worker_index: thread.index(),
+            cross: false,
+        }
+    }
+
+    /// Creates a new spin latch for cross-threadpool blocking. Notably, we
+    /// need to make sure the registry is kept alive after setting, so we can
+    /// safely call the notification.
+    #[inline]
+    pub(super) fn cross(thread: &'r WorkerThread) -> SpinLatch<'r> {
+        SpinLatch { cross: true, ..SpinLatch::new(thread) }
+    }
+
+    #[inline]
+    pub(super) fn probe(&self) -> bool {
+        self.core_latch.probe()
+    }
+}
+
+impl<'r> AsCoreLatch for SpinLatch<'r> {
+    #[inline]
+    fn as_core_latch(&self) -> &CoreLatch {
+        &self.core_latch
+    }
+}
+
+impl<'r> Latch for SpinLatch<'r> {
+    #[inline]
+    unsafe fn set(this: *const Self) {
+        let cross_registry;
+
+        let registry: &Registry = if unsafe { (*this).cross } {
+            // Ensure the registry stays alive while we notify it.
+            // Otherwise, it would be possible that we set the spin
+            // latch and the other thread sees it and exits, causing
+            // the registry to be deallocated, all before we get a
+            // chance to invoke `registry.notify_worker_latch_is_set`.
+            cross_registry = Arc::clone(unsafe { (*this).registry });
+            &cross_registry
+        } else {
+            // If this is not a "cross-registry" spin-latch, then the
+            // thread which is performing `set` is itself ensuring
+            // that the registry stays alive. However, that doesn't
+            // include this *particular* `Arc` handle if the waiting
+            // thread then exits, so we must completely dereference it.
+            unsafe { (*this).registry }
+        };
+        let target_worker_index = unsafe { (*this).target_worker_index };
+
+        // NOTE: Once we `set`, the target may proceed and invalidate `this`!
+        if unsafe { CoreLatch::set(&(*this).core_latch) } {
+            // Subtle: at this point, we can no longer read from
+            // `self`, because the thread owning this spin latch may
+            // have awoken and deallocated the latch. Therefore, we
+            // only use fields whose values we already read.
+            registry.notify_worker_latch_is_set(target_worker_index);
+        }
+    }
+}
+
+/// A Latch starts as false and eventually becomes true. You can block
+/// until it becomes true.
+#[derive(Debug)]
+pub(super) struct LockLatch {
+    m: Mutex<bool>,
+    v: Condvar,
+}
+
+impl LockLatch {
+    #[inline]
+    pub(super) fn new() -> LockLatch {
+        LockLatch { m: Mutex::new(false), v: Condvar::new() }
+    }
+
+    /// Block until latch is set, then resets this lock latch so it can be reused again.
+    pub(super) fn wait_and_reset(&self) {
+        let mut guard = self.m.lock().unwrap();
+        while !*guard {
+            guard = self.v.wait(guard).unwrap();
+        }
+        *guard = false;
+    }
+
+    /// Block until latch is set.
+    pub(super) fn wait(&self) {
+        let mut guard = self.m.lock().unwrap();
+        while !*guard {
+            guard = self.v.wait(guard).unwrap();
+        }
+    }
+}
+
+impl Latch for LockLatch {
+    #[inline]
+    unsafe fn set(this: *const Self) {
+        let mut guard = unsafe { (*this).m.lock().unwrap() };
+        *guard = true;
+        unsafe { (*this).v.notify_all() };
+    }
+}
+
+/// Once latches are used to implement one-time blocking, primarily
+/// for the termination flag of the threads in the pool.
+///
+/// Note: like a `SpinLatch`, once-latches are always associated with
+/// some registry that is probing them, which must be tickled when
+/// they are set. *Unlike* a `SpinLatch`, they don't themselves hold a
+/// reference to that registry. This is because in some cases the
+/// registry owns the once-latch, and that would create a cycle. So a
+/// `OnceLatch` must be given a reference to its owning registry when
+/// it is set. For this reason, it does not implement the `Latch`
+/// trait (but it doesn't have to, as it is not used in those generic
+/// contexts).
+#[derive(Debug)]
+pub(super) struct OnceLatch {
+    core_latch: CoreLatch,
+}
+
+impl OnceLatch {
+    #[inline]
+    pub(super) fn new() -> OnceLatch {
+        Self { core_latch: CoreLatch::new() }
+    }
+
+    /// Set the latch, then tickle the specific worker thread,
+    /// which should be the one that owns this latch.
+    #[inline]
+    pub(super) unsafe fn set_and_tickle_one(
+        this: *const Self,
+        registry: &Registry,
+        target_worker_index: usize,
+    ) {
+        if unsafe { CoreLatch::set(&(*this).core_latch) } {
+            registry.notify_worker_latch_is_set(target_worker_index);
+        }
+    }
+}
+
+impl AsCoreLatch for OnceLatch {
+    #[inline]
+    fn as_core_latch(&self) -> &CoreLatch {
+        &self.core_latch
+    }
+}
+
+/// Counting latches are used to implement scopes. They track a
+/// counter. Unlike other latches, calling `set()` does not
+/// necessarily make the latch be considered `set()`; instead, it just
+/// decrements the counter. The latch is only "set" (in the sense that
+/// `probe()` returns true) once the counter reaches zero.
+#[derive(Debug)]
+pub(super) struct CountLatch {
+    counter: AtomicUsize,
+    kind: CountLatchKind,
+}
+
+enum CountLatchKind {
+    /// A latch for scopes created on a rayon thread which will participate in work-
+    /// stealing while it waits for completion. This thread is not necessarily part
+    /// of the same registry as the scope itself!
+    Stealing {
+        latch: CoreLatch,
+        /// If a worker thread in registry A calls `in_place_scope` on a ThreadPool
+        /// with registry B, when a job completes in a thread of registry B, we may
+        /// need to call `notify_worker_latch_is_set()` to wake the thread in registry A.
+        /// That means we need a reference to registry A (since at that point we will
+        /// only have a reference to registry B), so we stash it here.
+        registry: Arc<Registry>,
+        /// The index of the worker to wake in `registry`
+        worker_index: usize,
+    },
+
+    /// A latch for scopes created on a non-rayon thread which will block to wait.
+    Blocking { latch: LockLatch },
+}
+
+impl std::fmt::Debug for CountLatchKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            CountLatchKind::Stealing { latch, .. } => {
+                f.debug_tuple("Stealing").field(latch).finish()
+            }
+            CountLatchKind::Blocking { latch, .. } => {
+                f.debug_tuple("Blocking").field(latch).finish()
+            }
+        }
+    }
+}
+
+impl CountLatch {
+    pub(super) fn new(owner: Option<&WorkerThread>) -> Self {
+        Self::with_count(1, owner)
+    }
+
+    pub(super) fn with_count(count: usize, owner: Option<&WorkerThread>) -> Self {
+        Self {
+            counter: AtomicUsize::new(count),
+            kind: match owner {
+                Some(owner) => CountLatchKind::Stealing {
+                    latch: CoreLatch::new(),
+                    registry: Arc::clone(owner.registry()),
+                    worker_index: owner.index(),
+                },
+                None => CountLatchKind::Blocking { latch: LockLatch::new() },
+            },
+        }
+    }
+
+    #[inline]
+    pub(super) fn increment(&self) {
+        let old_counter = self.counter.fetch_add(1, Ordering::Relaxed);
+        debug_assert!(old_counter != 0);
+    }
+
+    pub(super) fn wait(&self, owner: Option<&WorkerThread>) {
+        match &self.kind {
+            CountLatchKind::Stealing { latch, registry, worker_index } => unsafe {
+                let owner = owner.expect("owner thread");
+                debug_assert_eq!(registry.id(), owner.registry().id());
+                debug_assert_eq!(*worker_index, owner.index());
+                owner.wait_until(latch);
+            },
+            CountLatchKind::Blocking { latch } => latch.wait(),
+        }
+    }
+}
+
+impl Latch for CountLatch {
+    #[inline]
+    unsafe fn set(this: *const Self) {
+        if unsafe { (*this).counter.fetch_sub(1, Ordering::SeqCst) == 1 } {
+            // NOTE: Once we call `set` on the internal `latch`,
+            // the target may proceed and invalidate `this`!
+            match unsafe { &(*this).kind } {
+                CountLatchKind::Stealing { latch, registry, worker_index } => {
+                    let registry = Arc::clone(registry);
+                    if unsafe { CoreLatch::set(latch) } {
+                        registry.notify_worker_latch_is_set(*worker_index);
+                    }
+                }
+                CountLatchKind::Blocking { latch } => unsafe { LockLatch::set(latch) },
+            }
+        }
+    }
+}
+
+/// `&L` without any implication of `dereferenceable` for `Latch::set`
+pub(super) struct LatchRef<'a, L> {
+    inner: *const L,
+    marker: PhantomData<&'a L>,
+}
+
+impl<L> LatchRef<'_, L> {
+    pub(super) fn new(inner: &L) -> LatchRef<'_, L> {
+        LatchRef { inner, marker: PhantomData }
+    }
+}
+
+unsafe impl<L: Sync> Sync for LatchRef<'_, L> {}
+
+impl<L> Deref for LatchRef<'_, L> {
+    type Target = L;
+
+    fn deref(&self) -> &L {
+        // SAFETY: if we have &self, the inner latch is still alive
+        unsafe { &*self.inner }
+    }
+}
+
+impl<L: Latch> Latch for LatchRef<'_, L> {
+    #[inline]
+    unsafe fn set(this: *const Self) {
+        unsafe { L::set((*this).inner) };
+    }
+}
diff --git a/compiler/rustc_thread_pool/src/lib.rs b/compiler/rustc_thread_pool/src/lib.rs
new file mode 100644
index 0000000000000..34252d919e38e
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/lib.rs
@@ -0,0 +1,903 @@
+//! Rayon-core houses the core stable APIs of Rayon.
+//!
+//! These APIs have been mirrored in the Rayon crate and it is recommended to use these from there.
+//!
+//! [`join`] is used to take two closures and potentially run them in parallel.
+//!   - It will run in parallel if task B gets stolen before task A can finish.
+//!   - It will run sequentially if task A finishes before task B is stolen and can continue on task B.
+//!
+//! [`scope`] creates a scope in which you can run any number of parallel tasks.
+//! These tasks can spawn nested tasks and scopes, but given the nature of work stealing, the order of execution can not be guaranteed.
+//! The scope will exist until all tasks spawned within the scope have been completed.
+//!
+//! [`spawn`] add a task into the 'static' or 'global' scope, or a local scope created by the [`scope()`] function.
+//!
+//! [`ThreadPool`] can be used to create your own thread pools (using [`ThreadPoolBuilder`]) or to customize the global one.
+//! Tasks spawned within the pool (using [`install()`], [`join()`], etc.) will be added to a deque,
+//! where it becomes available for work stealing from other threads in the local threadpool.
+//!
+//! [`join`]: fn.join.html
+//! [`scope`]: fn.scope.html
+//! [`scope()`]: fn.scope.html
+//! [`spawn`]: fn.spawn.html
+//! [`ThreadPool`]: struct.threadpool.html
+//! [`install()`]: struct.ThreadPool.html#method.install
+//! [`spawn()`]: struct.ThreadPool.html#method.spawn
+//! [`join()`]: struct.ThreadPool.html#method.join
+//! [`ThreadPoolBuilder`]: struct.ThreadPoolBuilder.html
+//!
+//! # Global fallback when threading is unsupported
+//!
+//! Rayon uses `std` APIs for threading, but some targets have incomplete implementations that
+//! always return `Unsupported` errors. The WebAssembly `wasm32-unknown-unknown` and `wasm32-wasi`
+//! targets are notable examples of this. Rather than panicking on the unsupported error when
+//! creating the implicit global threadpool, Rayon configures a fallback mode instead.
+//!
+//! This fallback mode mostly functions as if it were using a single-threaded "pool", like setting
+//! `RAYON_NUM_THREADS=1`. For example, `join` will execute its two closures sequentially, since
+//! there is no other thread to share the work. However, since the pool is not running independent
+//! of the main thread, non-blocking calls like `spawn` may not execute at all, unless a lower-
+//! priority call like `broadcast` gives them an opening. The fallback mode does not try to emulate
+//! anything like thread preemption or `async` task switching, but `yield_now` or `yield_local`
+//! can also volunteer execution time.
+//!
+//! Explicit `ThreadPoolBuilder` methods always report their error without any fallback.
+//!
+//! # Restricting multiple versions
+//!
+//! In order to ensure proper coordination between threadpools, and especially
+//! to make sure there's only one global threadpool, `rayon-core` is actively
+//! restricted from building multiple versions of itself into a single target.
+//! You may see a build error like this in violation:
+//!
+//! ```text
+//! error: native library `rayon-core` is being linked to by more
+//! than one package, and can only be linked to by one package
+//! ```
+//!
+//! While we strive to keep `rayon-core` semver-compatible, it's still
+//! possible to arrive at this situation if different crates have overly
+//! restrictive tilde or inequality requirements for `rayon-core`. The
+//! conflicting requirements will need to be resolved before the build will
+//! succeed.
+
+#![cfg_attr(test, allow(unused_crate_dependencies))]
+#![warn(rust_2018_idioms)]
+
+use std::any::Any;
+use std::error::Error;
+use std::marker::PhantomData;
+use std::str::FromStr;
+use std::{env, fmt, io, thread};
+
+#[macro_use]
+mod private;
+
+mod broadcast;
+mod job;
+mod join;
+mod latch;
+mod registry;
+mod scope;
+mod sleep;
+mod spawn;
+mod thread_pool;
+mod unwind;
+mod worker_local;
+
+mod compile_fail;
+mod tests;
+
+pub mod tlv;
+
+pub use worker_local::WorkerLocal;
+
+pub use self::broadcast::{BroadcastContext, broadcast, spawn_broadcast};
+pub use self::join::{join, join_context};
+use self::registry::{CustomSpawn, DefaultSpawn, ThreadSpawn};
+pub use self::registry::{Registry, ThreadBuilder, mark_blocked, mark_unblocked};
+pub use self::scope::{Scope, ScopeFifo, in_place_scope, in_place_scope_fifo, scope, scope_fifo};
+pub use self::spawn::{spawn, spawn_fifo};
+pub use self::thread_pool::{
+    ThreadPool, Yield, current_thread_has_pending_tasks, current_thread_index, yield_local,
+    yield_now,
+};
+
+/// Returns the maximum number of threads that Rayon supports in a single thread-pool.
+///
+/// If a higher thread count is requested by calling `ThreadPoolBuilder::num_threads` or by setting
+/// the `RAYON_NUM_THREADS` environment variable, then it will be reduced to this maximum.
+///
+/// The value may vary between different targets, and is subject to change in new Rayon versions.
+pub fn max_num_threads() -> usize {
+    // We are limited by the bits available in the sleep counter's `AtomicUsize`.
+    crate::sleep::THREADS_MAX
+}
+
+/// Returns the number of threads in the current registry. If this
+/// code is executing within a Rayon thread-pool, then this will be
+/// the number of threads for the thread-pool of the current
+/// thread. Otherwise, it will be the number of threads for the global
+/// thread-pool.
+///
+/// This can be useful when trying to judge how many times to split
+/// parallel work (the parallel iterator traits use this value
+/// internally for this purpose).
+///
+/// # Future compatibility note
+///
+/// Note that unless this thread-pool was created with a
+/// builder that specifies the number of threads, then this
+/// number may vary over time in future versions (see [the
+/// `num_threads()` method for details][snt]).
+///
+/// [snt]: struct.ThreadPoolBuilder.html#method.num_threads
+pub fn current_num_threads() -> usize {
+    crate::registry::Registry::current_num_threads()
+}
+
+/// Error when initializing a thread pool.
+#[derive(Debug)]
+pub struct ThreadPoolBuildError {
+    kind: ErrorKind,
+}
+
+#[derive(Debug)]
+enum ErrorKind {
+    GlobalPoolAlreadyInitialized,
+    IOError(io::Error),
+}
+
+/// Used to create a new [`ThreadPool`] or to configure the global rayon thread pool.
+/// ## Creating a ThreadPool
+/// The following creates a thread pool with 22 threads.
+///
+/// ```rust
+/// # use rustc_thread_pool as rayon;
+/// let pool = rayon::ThreadPoolBuilder::new().num_threads(22).build().unwrap();
+/// ```
+///
+/// To instead configure the global thread pool, use [`build_global()`]:
+///
+/// ```rust
+/// # use rustc_thread_pool as rayon;
+/// rayon::ThreadPoolBuilder::new().num_threads(22).build_global().unwrap();
+/// ```
+///
+/// [`ThreadPool`]: struct.ThreadPool.html
+/// [`build_global()`]: struct.ThreadPoolBuilder.html#method.build_global
+pub struct ThreadPoolBuilder<S = DefaultSpawn> {
+    /// The number of threads in the rayon thread pool.
+    /// If zero will use the RAYON_NUM_THREADS environment variable.
+    /// If RAYON_NUM_THREADS is invalid or zero will use the default.
+    num_threads: usize,
+
+    /// Custom closure, if any, to handle a panic that we cannot propagate
+    /// anywhere else.
+    panic_handler: Option<Box<PanicHandler>>,
+
+    /// Closure to compute the name of a thread.
+    get_thread_name: Option<Box<dyn FnMut(usize) -> String>>,
+
+    /// The stack size for the created worker threads
+    stack_size: Option<usize>,
+
+    /// Closure invoked on deadlock.
+    deadlock_handler: Option<Box<DeadlockHandler>>,
+
+    /// Closure invoked on worker thread start.
+    start_handler: Option<Box<StartHandler>>,
+
+    /// Closure invoked on worker thread exit.
+    exit_handler: Option<Box<ExitHandler>>,
+
+    /// Closure invoked to spawn threads.
+    spawn_handler: S,
+
+    /// Closure invoked when starting computations in a thread.
+    acquire_thread_handler: Option<Box<AcquireThreadHandler>>,
+
+    /// Closure invoked when blocking in a thread.
+    release_thread_handler: Option<Box<ReleaseThreadHandler>>,
+
+    /// If false, worker threads will execute spawned jobs in a
+    /// "depth-first" fashion. If true, they will do a "breadth-first"
+    /// fashion. Depth-first is the default.
+    breadth_first: bool,
+}
+
+/// Contains the rayon thread pool configuration. Use [`ThreadPoolBuilder`] instead.
+///
+/// [`ThreadPoolBuilder`]: struct.ThreadPoolBuilder.html
+#[deprecated(note = "Use `ThreadPoolBuilder`")]
+#[derive(Default)]
+pub struct Configuration {
+    builder: ThreadPoolBuilder,
+}
+
+/// The type for a panic handling closure. Note that this same closure
+/// may be invoked multiple times in parallel.
+type PanicHandler = dyn Fn(Box<dyn Any + Send>) + Send + Sync;
+
+/// The type for a closure that gets invoked when the Rayon thread pool deadlocks
+type DeadlockHandler = dyn Fn() + Send + Sync;
+
+/// The type for a closure that gets invoked when a thread starts. The
+/// closure is passed the index of the thread on which it is invoked.
+/// Note that this same closure may be invoked multiple times in parallel.
+type StartHandler = dyn Fn(usize) + Send + Sync;
+
+/// The type for a closure that gets invoked when a thread exits. The
+/// closure is passed the index of the thread on which it is invoked.
+/// Note that this same closure may be invoked multiple times in parallel.
+type ExitHandler = dyn Fn(usize) + Send + Sync;
+
+// NB: We can't `#[derive(Default)]` because `S` is left ambiguous.
+impl Default for ThreadPoolBuilder {
+    fn default() -> Self {
+        ThreadPoolBuilder {
+            num_threads: 0,
+            panic_handler: None,
+            get_thread_name: None,
+            stack_size: None,
+            start_handler: None,
+            exit_handler: None,
+            deadlock_handler: None,
+            acquire_thread_handler: None,
+            release_thread_handler: None,
+            spawn_handler: DefaultSpawn,
+            breadth_first: false,
+        }
+    }
+}
+
+/// The type for a closure that gets invoked before starting computations in a thread.
+/// Note that this same closure may be invoked multiple times in parallel.
+type AcquireThreadHandler = dyn Fn() + Send + Sync;
+
+/// The type for a closure that gets invoked before blocking in a thread.
+/// Note that this same closure may be invoked multiple times in parallel.
+type ReleaseThreadHandler = dyn Fn() + Send + Sync;
+
+impl ThreadPoolBuilder {
+    /// Creates and returns a valid rayon thread pool builder, but does not initialize it.
+    pub fn new() -> Self {
+        Self::default()
+    }
+}
+
+/// Note: the `S: ThreadSpawn` constraint is an internal implementation detail for the
+/// default spawn and those set by [`spawn_handler`](#method.spawn_handler).
+impl<S> ThreadPoolBuilder<S>
+where
+    S: ThreadSpawn,
+{
+    /// Creates a new `ThreadPool` initialized using this configuration.
+    pub fn build(self) -> Result<ThreadPool, ThreadPoolBuildError> {
+        ThreadPool::build(self)
+    }
+
+    /// Initializes the global thread pool. This initialization is
+    /// **optional**. If you do not call this function, the thread pool
+    /// will be automatically initialized with the default
+    /// configuration. Calling `build_global` is not recommended, except
+    /// in two scenarios:
+    ///
+    /// - You wish to change the default configuration.
+    /// - You are running a benchmark, in which case initializing may
+    ///   yield slightly more consistent results, since the worker threads
+    ///   will already be ready to go even in the first iteration. But
+    ///   this cost is minimal.
+    ///
+    /// Initialization of the global thread pool happens exactly
+    /// once. Once started, the configuration cannot be
+    /// changed. Therefore, if you call `build_global` a second time, it
+    /// will return an error. An `Ok` result indicates that this
+    /// is the first initialization of the thread pool.
+    pub fn build_global(self) -> Result<(), ThreadPoolBuildError> {
+        let registry = registry::init_global_registry(self)?;
+        registry.wait_until_primed();
+        Ok(())
+    }
+}
+
+impl ThreadPoolBuilder {
+    /// Creates a scoped `ThreadPool` initialized using this configuration.
+    ///
+    /// This is a convenience function for building a pool using [`std::thread::scope`]
+    /// to spawn threads in a [`spawn_handler`](#method.spawn_handler).
+    /// The threads in this pool will start by calling `wrapper`, which should
+    /// do initialization and continue by calling `ThreadBuilder::run()`.
+    ///
+    /// [`std::thread::scope`]: https://doc.rust-lang.org/std/thread/fn.scope.html
+    ///
+    /// # Examples
+    ///
+    /// A scoped pool may be useful in combination with scoped thread-local variables.
+    ///
+    /// ```
+    /// # use rustc_thread_pool as rayon;
+    ///
+    /// scoped_tls::scoped_thread_local!(static POOL_DATA: Vec<i32>);
+    ///
+    /// fn main() -> Result<(), rayon::ThreadPoolBuildError> {
+    ///     let pool_data = vec![1, 2, 3];
+    ///
+    ///     // We haven't assigned any TLS data yet.
+    ///     assert!(!POOL_DATA.is_set());
+    ///
+    ///     rayon::ThreadPoolBuilder::new()
+    ///         .build_scoped(
+    ///             // Borrow `pool_data` in TLS for each thread.
+    ///             |thread| POOL_DATA.set(&pool_data, || thread.run()),
+    ///             // Do some work that needs the TLS data.
+    ///             |pool| pool.install(|| assert!(POOL_DATA.is_set())),
+    ///         )?;
+    ///
+    ///     // Once we've returned, `pool_data` is no longer borrowed.
+    ///     drop(pool_data);
+    ///     Ok(())
+    /// }
+    /// ```
+    pub fn build_scoped<W, F, R>(self, wrapper: W, with_pool: F) -> Result<R, ThreadPoolBuildError>
+    where
+        W: Fn(ThreadBuilder) + Sync, // expected to call `run()`
+        F: FnOnce(&ThreadPool) -> R,
+    {
+        std::thread::scope(|scope| {
+            let pool = self
+                .spawn_handler(|thread| {
+                    let mut builder = std::thread::Builder::new();
+                    if let Some(name) = thread.name() {
+                        builder = builder.name(name.to_string());
+                    }
+                    if let Some(size) = thread.stack_size() {
+                        builder = builder.stack_size(size);
+                    }
+                    builder.spawn_scoped(scope, || wrapper(thread))?;
+                    Ok(())
+                })
+                .build()?;
+            let result = unwind::halt_unwinding(|| with_pool(&pool));
+            pool.wait_until_stopped();
+            match result {
+                Ok(result) => Ok(result),
+                Err(err) => unwind::resume_unwinding(err),
+            }
+        })
+    }
+}
+
+impl<S> ThreadPoolBuilder<S> {
+    /// Sets a custom function for spawning threads.
+    ///
+    /// Note that the threads will not exit until after the pool is dropped. It
+    /// is up to the caller to wait for thread termination if that is important
+    /// for any invariants. For instance, threads created in [`std::thread::scope`]
+    /// will be joined before that scope returns, and this will block indefinitely
+    /// if the pool is leaked. Furthermore, the global thread pool doesn't terminate
+    /// until the entire process exits!
+    ///
+    /// # Examples
+    ///
+    /// A minimal spawn handler just needs to call `run()` from an independent thread.
+    ///
+    /// ```
+    /// # use rustc_thread_pool as rayon;
+    /// fn main() -> Result<(), rayon::ThreadPoolBuildError> {
+    ///     let pool = rayon::ThreadPoolBuilder::new()
+    ///         .spawn_handler(|thread| {
+    ///             std::thread::spawn(|| thread.run());
+    ///             Ok(())
+    ///         })
+    ///         .build()?;
+    ///
+    ///     pool.install(|| println!("Hello from my custom thread!"));
+    ///     Ok(())
+    /// }
+    /// ```
+    ///
+    /// The default spawn handler sets the name and stack size if given, and propagates
+    /// any errors from the thread builder.
+    ///
+    /// ```
+    /// # use rustc_thread_pool as rayon;
+    /// fn main() -> Result<(), rayon::ThreadPoolBuildError> {
+    ///     let pool = rayon::ThreadPoolBuilder::new()
+    ///         .spawn_handler(|thread| {
+    ///             let mut b = std::thread::Builder::new();
+    ///             if let Some(name) = thread.name() {
+    ///                 b = b.name(name.to_owned());
+    ///             }
+    ///             if let Some(stack_size) = thread.stack_size() {
+    ///                 b = b.stack_size(stack_size);
+    ///             }
+    ///             b.spawn(|| thread.run())?;
+    ///             Ok(())
+    ///         })
+    ///         .build()?;
+    ///
+    ///     pool.install(|| println!("Hello from my fully custom thread!"));
+    ///     Ok(())
+    /// }
+    /// ```
+    ///
+    /// This can also be used for a pool of scoped threads like [`crossbeam::scope`],
+    /// or [`std::thread::scope`] introduced in Rust 1.63, which is encapsulated in
+    /// [`build_scoped`](#method.build_scoped).
+    ///
+    /// [`crossbeam::scope`]: https://docs.rs/crossbeam/0.8/crossbeam/fn.scope.html
+    /// [`std::thread::scope`]: https://doc.rust-lang.org/std/thread/fn.scope.html
+    ///
+    /// ```
+    /// # use rustc_thread_pool as rayon;
+    /// fn main() -> Result<(), rayon::ThreadPoolBuildError> {
+    ///     std::thread::scope(|scope| {
+    ///         let pool = rayon::ThreadPoolBuilder::new()
+    ///             .spawn_handler(|thread| {
+    ///                 let mut builder = std::thread::Builder::new();
+    ///                 if let Some(name) = thread.name() {
+    ///                     builder = builder.name(name.to_string());
+    ///                 }
+    ///                 if let Some(size) = thread.stack_size() {
+    ///                     builder = builder.stack_size(size);
+    ///                 }
+    ///                 builder.spawn_scoped(scope, || {
+    ///                     // Add any scoped initialization here, then run!
+    ///                     thread.run()
+    ///                 })?;
+    ///                 Ok(())
+    ///             })
+    ///             .build()?;
+    ///
+    ///         pool.install(|| println!("Hello from my custom scoped thread!"));
+    ///         Ok(())
+    ///     })
+    /// }
+    /// ```
+    pub fn spawn_handler<F>(self, spawn: F) -> ThreadPoolBuilder<CustomSpawn<F>>
+    where
+        F: FnMut(ThreadBuilder) -> io::Result<()>,
+    {
+        ThreadPoolBuilder {
+            spawn_handler: CustomSpawn::new(spawn),
+            // ..self
+            num_threads: self.num_threads,
+            panic_handler: self.panic_handler,
+            get_thread_name: self.get_thread_name,
+            stack_size: self.stack_size,
+            start_handler: self.start_handler,
+            exit_handler: self.exit_handler,
+            deadlock_handler: self.deadlock_handler,
+            acquire_thread_handler: self.acquire_thread_handler,
+            release_thread_handler: self.release_thread_handler,
+            breadth_first: self.breadth_first,
+        }
+    }
+
+    /// Returns a reference to the current spawn handler.
+    fn get_spawn_handler(&mut self) -> &mut S {
+        &mut self.spawn_handler
+    }
+
+    /// Get the number of threads that will be used for the thread
+    /// pool. See `num_threads()` for more information.
+    fn get_num_threads(&self) -> usize {
+        if self.num_threads > 0 {
+            self.num_threads
+        } else {
+            let default = || thread::available_parallelism().map(|n| n.get()).unwrap_or(1);
+
+            match env::var("RAYON_NUM_THREADS").ok().and_then(|s| usize::from_str(&s).ok()) {
+                Some(x @ 1..) => return x,
+                Some(0) => return default(),
+                _ => {}
+            }
+
+            // Support for deprecated `RAYON_RS_NUM_CPUS`.
+            match env::var("RAYON_RS_NUM_CPUS").ok().and_then(|s| usize::from_str(&s).ok()) {
+                Some(x @ 1..) => x,
+                _ => default(),
+            }
+        }
+    }
+
+    /// Get the thread name for the thread with the given index.
+    fn get_thread_name(&mut self, index: usize) -> Option<String> {
+        let f = self.get_thread_name.as_mut()?;
+        Some(f(index))
+    }
+
+    /// Sets a closure which takes a thread index and returns
+    /// the thread's name.
+    pub fn thread_name<F>(mut self, closure: F) -> Self
+    where
+        F: FnMut(usize) -> String + 'static,
+    {
+        self.get_thread_name = Some(Box::new(closure));
+        self
+    }
+
+    /// Sets the number of threads to be used in the rayon threadpool.
+    ///
+    /// If you specify a non-zero number of threads using this
+    /// function, then the resulting thread-pools are guaranteed to
+    /// start at most this number of threads.
+    ///
+    /// If `num_threads` is 0, or you do not call this function, then
+    /// the Rayon runtime will select the number of threads
+    /// automatically. At present, this is based on the
+    /// `RAYON_NUM_THREADS` environment variable (if set),
+    /// or the number of logical CPUs (otherwise).
+    /// In the future, however, the default behavior may
+    /// change to dynamically add or remove threads as needed.
+    ///
+    /// **Future compatibility warning:** Given the default behavior
+    /// may change in the future, if you wish to rely on a fixed
+    /// number of threads, you should use this function to specify
+    /// that number. To reproduce the current default behavior, you
+    /// may wish to use [`std::thread::available_parallelism`]
+    /// to query the number of CPUs dynamically.
+    ///
+    /// **Old environment variable:** `RAYON_NUM_THREADS` is a one-to-one
+    /// replacement of the now deprecated `RAYON_RS_NUM_CPUS` environment
+    /// variable. If both variables are specified, `RAYON_NUM_THREADS` will
+    /// be preferred.
+    pub fn num_threads(mut self, num_threads: usize) -> Self {
+        self.num_threads = num_threads;
+        self
+    }
+
+    /// Returns a copy of the current panic handler.
+    fn take_panic_handler(&mut self) -> Option<Box<PanicHandler>> {
+        self.panic_handler.take()
+    }
+
+    /// Normally, whenever Rayon catches a panic, it tries to
+    /// propagate it to someplace sensible, to try and reflect the
+    /// semantics of sequential execution. But in some cases,
+    /// particularly with the `spawn()` APIs, there is no
+    /// obvious place where we should propagate the panic to.
+    /// In that case, this panic handler is invoked.
+    ///
+    /// If no panic handler is set, the default is to abort the
+    /// process, under the principle that panics should not go
+    /// unobserved.
+    ///
+    /// If the panic handler itself panics, this will abort the
+    /// process. To prevent this, wrap the body of your panic handler
+    /// in a call to `std::panic::catch_unwind()`.
+    pub fn panic_handler<H>(mut self, panic_handler: H) -> Self
+    where
+        H: Fn(Box<dyn Any + Send>) + Send + Sync + 'static,
+    {
+        self.panic_handler = Some(Box::new(panic_handler));
+        self
+    }
+
+    /// Get the stack size of the worker threads
+    fn get_stack_size(&self) -> Option<usize> {
+        self.stack_size
+    }
+
+    /// Sets the stack size of the worker threads
+    pub fn stack_size(mut self, stack_size: usize) -> Self {
+        self.stack_size = Some(stack_size);
+        self
+    }
+
+    /// **(DEPRECATED)** Suggest to worker threads that they execute
+    /// spawned jobs in a "breadth-first" fashion.
+    ///
+    /// Typically, when a worker thread is idle or blocked, it will
+    /// attempt to execute the job from the *top* of its local deque of
+    /// work (i.e., the job most recently spawned). If this flag is set
+    /// to true, however, workers will prefer to execute in a
+    /// *breadth-first* fashion -- that is, they will search for jobs at
+    /// the *bottom* of their local deque. (At present, workers *always*
+    /// steal from the bottom of other workers' deques, regardless of
+    /// the setting of this flag.)
+    ///
+    /// If you think of the tasks as a tree, where a parent task
+    /// spawns its children in the tree, then this flag loosely
+    /// corresponds to doing a breadth-first traversal of the tree,
+    /// whereas the default would be to do a depth-first traversal.
+    ///
+    /// **Note that this is an "execution hint".** Rayon's task
+    /// execution is highly dynamic and the precise order in which
+    /// independent tasks are executed is not intended to be
+    /// guaranteed.
+    ///
+    /// This `breadth_first()` method is now deprecated per [RFC #1],
+    /// and in the future its effect may be removed. Consider using
+    /// [`scope_fifo()`] for a similar effect.
+    ///
+    /// [RFC #1]: https://github.com/rayon-rs/rfcs/blob/master/accepted/rfc0001-scope-scheduling.md
+    /// [`scope_fifo()`]: fn.scope_fifo.html
+    #[deprecated(note = "use `scope_fifo` and `spawn_fifo` for similar effect")]
+    pub fn breadth_first(mut self) -> Self {
+        self.breadth_first = true;
+        self
+    }
+
+    fn get_breadth_first(&self) -> bool {
+        self.breadth_first
+    }
+
+    /// Takes the current acquire thread callback, leaving `None`.
+    fn take_acquire_thread_handler(&mut self) -> Option<Box<AcquireThreadHandler>> {
+        self.acquire_thread_handler.take()
+    }
+
+    /// Set a callback to be invoked when starting computations in a thread.
+    pub fn acquire_thread_handler<H>(mut self, acquire_thread_handler: H) -> Self
+    where
+        H: Fn() + Send + Sync + 'static,
+    {
+        self.acquire_thread_handler = Some(Box::new(acquire_thread_handler));
+        self
+    }
+
+    /// Takes the current release thread callback, leaving `None`.
+    fn take_release_thread_handler(&mut self) -> Option<Box<ReleaseThreadHandler>> {
+        self.release_thread_handler.take()
+    }
+
+    /// Set a callback to be invoked when blocking in thread.
+    pub fn release_thread_handler<H>(mut self, release_thread_handler: H) -> Self
+    where
+        H: Fn() + Send + Sync + 'static,
+    {
+        self.release_thread_handler = Some(Box::new(release_thread_handler));
+        self
+    }
+
+    /// Takes the current deadlock callback, leaving `None`.
+    fn take_deadlock_handler(&mut self) -> Option<Box<DeadlockHandler>> {
+        self.deadlock_handler.take()
+    }
+
+    /// Set a callback to be invoked on current deadlock.
+    pub fn deadlock_handler<H>(mut self, deadlock_handler: H) -> Self
+    where
+        H: Fn() + Send + Sync + 'static,
+    {
+        self.deadlock_handler = Some(Box::new(deadlock_handler));
+        self
+    }
+
+    /// Takes the current thread start callback, leaving `None`.
+    fn take_start_handler(&mut self) -> Option<Box<StartHandler>> {
+        self.start_handler.take()
+    }
+
+    /// Sets a callback to be invoked on thread start.
+    ///
+    /// The closure is passed the index of the thread on which it is invoked.
+    /// Note that this same closure may be invoked multiple times in parallel.
+    /// If this closure panics, the panic will be passed to the panic handler.
+    /// If that handler returns, then startup will continue normally.
+    pub fn start_handler<H>(mut self, start_handler: H) -> Self
+    where
+        H: Fn(usize) + Send + Sync + 'static,
+    {
+        self.start_handler = Some(Box::new(start_handler));
+        self
+    }
+
+    /// Returns a current thread exit callback, leaving `None`.
+    fn take_exit_handler(&mut self) -> Option<Box<ExitHandler>> {
+        self.exit_handler.take()
+    }
+
+    /// Sets a callback to be invoked on thread exit.
+    ///
+    /// The closure is passed the index of the thread on which it is invoked.
+    /// Note that this same closure may be invoked multiple times in parallel.
+    /// If this closure panics, the panic will be passed to the panic handler.
+    /// If that handler returns, then the thread will exit normally.
+    pub fn exit_handler<H>(mut self, exit_handler: H) -> Self
+    where
+        H: Fn(usize) + Send + Sync + 'static,
+    {
+        self.exit_handler = Some(Box::new(exit_handler));
+        self
+    }
+}
+
+#[allow(deprecated)]
+impl Configuration {
+    /// Creates and return a valid rayon thread pool configuration, but does not initialize it.
+    pub fn new() -> Configuration {
+        Configuration { builder: ThreadPoolBuilder::new() }
+    }
+
+    /// Deprecated in favor of `ThreadPoolBuilder::build`.
+    pub fn build(self) -> Result<ThreadPool, Box<dyn Error + 'static>> {
+        self.builder.build().map_err(Box::from)
+    }
+
+    /// Deprecated in favor of `ThreadPoolBuilder::thread_name`.
+    pub fn thread_name<F>(mut self, closure: F) -> Self
+    where
+        F: FnMut(usize) -> String + 'static,
+    {
+        self.builder = self.builder.thread_name(closure);
+        self
+    }
+
+    /// Deprecated in favor of `ThreadPoolBuilder::num_threads`.
+    pub fn num_threads(mut self, num_threads: usize) -> Configuration {
+        self.builder = self.builder.num_threads(num_threads);
+        self
+    }
+
+    /// Deprecated in favor of `ThreadPoolBuilder::panic_handler`.
+    pub fn panic_handler<H>(mut self, panic_handler: H) -> Configuration
+    where
+        H: Fn(Box<dyn Any + Send>) + Send + Sync + 'static,
+    {
+        self.builder = self.builder.panic_handler(panic_handler);
+        self
+    }
+
+    /// Deprecated in favor of `ThreadPoolBuilder::stack_size`.
+    pub fn stack_size(mut self, stack_size: usize) -> Self {
+        self.builder = self.builder.stack_size(stack_size);
+        self
+    }
+
+    /// Deprecated in favor of `ThreadPoolBuilder::breadth_first`.
+    pub fn breadth_first(mut self) -> Self {
+        self.builder = self.builder.breadth_first();
+        self
+    }
+
+    /// Deprecated in favor of `ThreadPoolBuilder::start_handler`.
+    pub fn start_handler<H>(mut self, start_handler: H) -> Configuration
+    where
+        H: Fn(usize) + Send + Sync + 'static,
+    {
+        self.builder = self.builder.start_handler(start_handler);
+        self
+    }
+
+    /// Deprecated in favor of `ThreadPoolBuilder::exit_handler`.
+    pub fn exit_handler<H>(mut self, exit_handler: H) -> Configuration
+    where
+        H: Fn(usize) + Send + Sync + 'static,
+    {
+        self.builder = self.builder.exit_handler(exit_handler);
+        self
+    }
+
+    /// Returns a ThreadPoolBuilder with identical parameters.
+    fn into_builder(self) -> ThreadPoolBuilder {
+        self.builder
+    }
+}
+
+impl ThreadPoolBuildError {
+    fn new(kind: ErrorKind) -> ThreadPoolBuildError {
+        ThreadPoolBuildError { kind }
+    }
+
+    fn is_unsupported(&self) -> bool {
+        matches!(&self.kind, ErrorKind::IOError(e) if e.kind() == io::ErrorKind::Unsupported)
+    }
+}
+
+const GLOBAL_POOL_ALREADY_INITIALIZED: &str =
+    "The global thread pool has already been initialized.";
+
+impl Error for ThreadPoolBuildError {
+    #[allow(deprecated)]
+    fn description(&self) -> &str {
+        match self.kind {
+            ErrorKind::GlobalPoolAlreadyInitialized => GLOBAL_POOL_ALREADY_INITIALIZED,
+            ErrorKind::IOError(ref e) => e.description(),
+        }
+    }
+
+    fn source(&self) -> Option<&(dyn Error + 'static)> {
+        match &self.kind {
+            ErrorKind::GlobalPoolAlreadyInitialized => None,
+            ErrorKind::IOError(e) => Some(e),
+        }
+    }
+}
+
+impl fmt::Display for ThreadPoolBuildError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match &self.kind {
+            ErrorKind::GlobalPoolAlreadyInitialized => GLOBAL_POOL_ALREADY_INITIALIZED.fmt(f),
+            ErrorKind::IOError(e) => e.fmt(f),
+        }
+    }
+}
+
+/// Deprecated in favor of `ThreadPoolBuilder::build_global`.
+#[deprecated(note = "use `ThreadPoolBuilder::build_global`")]
+#[allow(deprecated)]
+pub fn initialize(config: Configuration) -> Result<(), Box<dyn Error>> {
+    config.into_builder().build_global().map_err(Box::from)
+}
+
+impl<S> fmt::Debug for ThreadPoolBuilder<S> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let ThreadPoolBuilder {
+            ref num_threads,
+            ref get_thread_name,
+            ref panic_handler,
+            ref stack_size,
+            ref deadlock_handler,
+            ref start_handler,
+            ref exit_handler,
+            ref acquire_thread_handler,
+            ref release_thread_handler,
+            spawn_handler: _,
+            ref breadth_first,
+        } = *self;
+
+        // Just print `Some(<closure>)` or `None` to the debug
+        // output.
+        struct ClosurePlaceholder;
+        impl fmt::Debug for ClosurePlaceholder {
+            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+                f.write_str("<closure>")
+            }
+        }
+        let get_thread_name = get_thread_name.as_ref().map(|_| ClosurePlaceholder);
+        let panic_handler = panic_handler.as_ref().map(|_| ClosurePlaceholder);
+        let deadlock_handler = deadlock_handler.as_ref().map(|_| ClosurePlaceholder);
+        let start_handler = start_handler.as_ref().map(|_| ClosurePlaceholder);
+        let exit_handler = exit_handler.as_ref().map(|_| ClosurePlaceholder);
+        let acquire_thread_handler = acquire_thread_handler.as_ref().map(|_| ClosurePlaceholder);
+        let release_thread_handler = release_thread_handler.as_ref().map(|_| ClosurePlaceholder);
+
+        f.debug_struct("ThreadPoolBuilder")
+            .field("num_threads", num_threads)
+            .field("get_thread_name", &get_thread_name)
+            .field("panic_handler", &panic_handler)
+            .field("stack_size", &stack_size)
+            .field("deadlock_handler", &deadlock_handler)
+            .field("start_handler", &start_handler)
+            .field("exit_handler", &exit_handler)
+            .field("acquire_thread_handler", &acquire_thread_handler)
+            .field("release_thread_handler", &release_thread_handler)
+            .field("breadth_first", &breadth_first)
+            .finish()
+    }
+}
+
+#[allow(deprecated)]
+impl fmt::Debug for Configuration {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.builder.fmt(f)
+    }
+}
+
+/// Provides the calling context to a closure called by `join_context`.
+#[derive(Debug)]
+pub struct FnContext {
+    migrated: bool,
+
+    /// disable `Send` and `Sync`, just for a little future-proofing.
+    _marker: PhantomData<*mut ()>,
+}
+
+impl FnContext {
+    #[inline]
+    fn new(migrated: bool) -> Self {
+        FnContext { migrated, _marker: PhantomData }
+    }
+}
+
+impl FnContext {
+    /// Returns `true` if the closure was called from a different thread
+    /// than it was provided from.
+    #[inline]
+    pub fn migrated(&self) -> bool {
+        self.migrated
+    }
+}
diff --git a/compiler/rustc_thread_pool/src/private.rs b/compiler/rustc_thread_pool/src/private.rs
new file mode 100644
index 0000000000000..5d4f4a8c2caf5
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/private.rs
@@ -0,0 +1,26 @@
+//! The public parts of this private module are used to create traits
+//! that cannot be implemented outside of our own crate. This way we
+//! can feel free to extend those traits without worrying about it
+//! being a breaking change for other implementations.
+
+/// If this type is pub but not publicly reachable, third parties
+/// can't name it and can't implement traits using it.
+#[allow(missing_debug_implementations)]
+pub struct PrivateMarker;
+
+macro_rules! private_decl {
+    () => {
+        /// This trait is private; this method exists to make it
+        /// impossible to implement outside the crate.
+        #[doc(hidden)]
+        fn __rayon_private__(&self) -> crate::private::PrivateMarker;
+    };
+}
+
+macro_rules! private_impl {
+    () => {
+        fn __rayon_private__(&self) -> crate::private::PrivateMarker {
+            crate::private::PrivateMarker
+        }
+    };
+}
diff --git a/compiler/rustc_thread_pool/src/registry.rs b/compiler/rustc_thread_pool/src/registry.rs
new file mode 100644
index 0000000000000..03a01aa29d2af
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/registry.rs
@@ -0,0 +1,1025 @@
+use std::cell::Cell;
+use std::collections::hash_map::DefaultHasher;
+use std::hash::Hasher;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::{Arc, Mutex, Once};
+use std::{fmt, io, mem, ptr, thread};
+
+use crossbeam_deque::{Injector, Steal, Stealer, Worker};
+
+use crate::job::{JobFifo, JobRef, StackJob};
+use crate::latch::{AsCoreLatch, CoreLatch, Latch, LatchRef, LockLatch, OnceLatch, SpinLatch};
+use crate::sleep::Sleep;
+use crate::tlv::Tlv;
+use crate::{
+    AcquireThreadHandler, DeadlockHandler, ErrorKind, ExitHandler, PanicHandler,
+    ReleaseThreadHandler, StartHandler, ThreadPoolBuildError, ThreadPoolBuilder, Yield, unwind,
+};
+
+/// Thread builder used for customization via
+/// [`ThreadPoolBuilder::spawn_handler`](struct.ThreadPoolBuilder.html#method.spawn_handler).
+pub struct ThreadBuilder {
+    name: Option<String>,
+    stack_size: Option<usize>,
+    worker: Worker<JobRef>,
+    stealer: Stealer<JobRef>,
+    registry: Arc<Registry>,
+    index: usize,
+}
+
+impl ThreadBuilder {
+    /// Gets the index of this thread in the pool, within `0..num_threads`.
+    pub fn index(&self) -> usize {
+        self.index
+    }
+
+    /// Gets the string that was specified by `ThreadPoolBuilder::name()`.
+    pub fn name(&self) -> Option<&str> {
+        self.name.as_deref()
+    }
+
+    /// Gets the value that was specified by `ThreadPoolBuilder::stack_size()`.
+    pub fn stack_size(&self) -> Option<usize> {
+        self.stack_size
+    }
+
+    /// Executes the main loop for this thread. This will not return until the
+    /// thread pool is dropped.
+    pub fn run(self) {
+        unsafe { main_loop(self) }
+    }
+}
+
+impl fmt::Debug for ThreadBuilder {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("ThreadBuilder")
+            .field("pool", &self.registry.id())
+            .field("index", &self.index)
+            .field("name", &self.name)
+            .field("stack_size", &self.stack_size)
+            .finish()
+    }
+}
+
+/// Generalized trait for spawning a thread in the `Registry`.
+///
+/// This trait is pub-in-private -- E0445 forces us to make it public,
+/// but we don't actually want to expose these details in the API.
+pub trait ThreadSpawn {
+    private_decl! {}
+
+    /// Spawn a thread with the `ThreadBuilder` parameters, and then
+    /// call `ThreadBuilder::run()`.
+    fn spawn(&mut self, thread: ThreadBuilder) -> io::Result<()>;
+}
+
+/// Spawns a thread in the "normal" way with `std::thread::Builder`.
+///
+/// This type is pub-in-private -- E0445 forces us to make it public,
+/// but we don't actually want to expose these details in the API.
+#[derive(Debug, Default)]
+pub struct DefaultSpawn;
+
+impl ThreadSpawn for DefaultSpawn {
+    private_impl! {}
+
+    fn spawn(&mut self, thread: ThreadBuilder) -> io::Result<()> {
+        let mut b = thread::Builder::new();
+        if let Some(name) = thread.name() {
+            b = b.name(name.to_owned());
+        }
+        if let Some(stack_size) = thread.stack_size() {
+            b = b.stack_size(stack_size);
+        }
+        b.spawn(|| thread.run())?;
+        Ok(())
+    }
+}
+
+/// Spawns a thread with a user's custom callback.
+///
+/// This type is pub-in-private -- E0445 forces us to make it public,
+/// but we don't actually want to expose these details in the API.
+#[derive(Debug)]
+pub struct CustomSpawn<F>(F);
+
+impl<F> CustomSpawn<F>
+where
+    F: FnMut(ThreadBuilder) -> io::Result<()>,
+{
+    pub(super) fn new(spawn: F) -> Self {
+        CustomSpawn(spawn)
+    }
+}
+
+impl<F> ThreadSpawn for CustomSpawn<F>
+where
+    F: FnMut(ThreadBuilder) -> io::Result<()>,
+{
+    private_impl! {}
+
+    #[inline]
+    fn spawn(&mut self, thread: ThreadBuilder) -> io::Result<()> {
+        (self.0)(thread)
+    }
+}
+
+pub struct Registry {
+    thread_infos: Vec<ThreadInfo>,
+    sleep: Sleep,
+    injected_jobs: Injector<JobRef>,
+    broadcasts: Mutex<Vec<Worker<JobRef>>>,
+    panic_handler: Option<Box<PanicHandler>>,
+    pub(crate) deadlock_handler: Option<Box<DeadlockHandler>>,
+    start_handler: Option<Box<StartHandler>>,
+    exit_handler: Option<Box<ExitHandler>>,
+    pub(crate) acquire_thread_handler: Option<Box<AcquireThreadHandler>>,
+    pub(crate) release_thread_handler: Option<Box<ReleaseThreadHandler>>,
+
+    // When this latch reaches 0, it means that all work on this
+    // registry must be complete. This is ensured in the following ways:
+    //
+    // - if this is the global registry, there is a ref-count that never
+    //   gets released.
+    // - if this is a user-created thread-pool, then so long as the thread-pool
+    //   exists, it holds a reference.
+    // - when we inject a "blocking job" into the registry with `ThreadPool::install()`,
+    //   no adjustment is needed; the `ThreadPool` holds the reference, and since we won't
+    //   return until the blocking job is complete, that ref will continue to be held.
+    // - when `join()` or `scope()` is invoked, similarly, no adjustments are needed.
+    //   These are always owned by some other job (e.g., one injected by `ThreadPool::install()`)
+    //   and that job will keep the pool alive.
+    terminate_count: AtomicUsize,
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Initialization
+
+static mut THE_REGISTRY: Option<Arc<Registry>> = None;
+static THE_REGISTRY_SET: Once = Once::new();
+
+/// Starts the worker threads (if that has not already happened). If
+/// initialization has not already occurred, use the default
+/// configuration.
+pub(super) fn global_registry() -> &'static Arc<Registry> {
+    set_global_registry(default_global_registry)
+        .or_else(|err| {
+            // SAFETY: we only create a shared reference to `THE_REGISTRY` after the `call_once`
+            // that initializes it, and there will be no more mutable accesses at all.
+            debug_assert!(THE_REGISTRY_SET.is_completed());
+            let the_registry = unsafe { &*ptr::addr_of!(THE_REGISTRY) };
+            the_registry.as_ref().ok_or(err)
+        })
+        .expect("The global thread pool has not been initialized.")
+}
+
+/// Starts the worker threads (if that has not already happened) with
+/// the given builder.
+pub(super) fn init_global_registry<S>(
+    builder: ThreadPoolBuilder<S>,
+) -> Result<&'static Arc<Registry>, ThreadPoolBuildError>
+where
+    S: ThreadSpawn,
+{
+    set_global_registry(|| Registry::new(builder))
+}
+
+/// Starts the worker threads (if that has not already happened)
+/// by creating a registry with the given callback.
+fn set_global_registry<F>(registry: F) -> Result<&'static Arc<Registry>, ThreadPoolBuildError>
+where
+    F: FnOnce() -> Result<Arc<Registry>, ThreadPoolBuildError>,
+{
+    let mut result = Err(ThreadPoolBuildError::new(ErrorKind::GlobalPoolAlreadyInitialized));
+
+    THE_REGISTRY_SET.call_once(|| {
+        result = registry().map(|registry: Arc<Registry>| {
+            // SAFETY: this is the only mutable access to `THE_REGISTRY`, thanks to `Once`, and
+            // `global_registry()` only takes a shared reference **after** this `call_once`.
+            unsafe {
+                ptr::addr_of_mut!(THE_REGISTRY).write(Some(registry));
+                (*ptr::addr_of!(THE_REGISTRY)).as_ref().unwrap_unchecked()
+            }
+        })
+    });
+
+    result
+}
+
+fn default_global_registry() -> Result<Arc<Registry>, ThreadPoolBuildError> {
+    let result = Registry::new(ThreadPoolBuilder::new());
+
+    // If we're running in an environment that doesn't support threads at all, we can fall back to
+    // using the current thread alone. This is crude, and probably won't work for non-blocking
+    // calls like `spawn` or `broadcast_spawn`, but a lot of stuff does work fine.
+    //
+    // Notably, this allows current WebAssembly targets to work even though their threading support
+    // is stubbed out, and we won't have to change anything if they do add real threading.
+    let unsupported = matches!(&result, Err(e) if e.is_unsupported());
+    if unsupported && WorkerThread::current().is_null() {
+        let builder = ThreadPoolBuilder::new().num_threads(1).spawn_handler(|thread| {
+            // Rather than starting a new thread, we're just taking over the current thread
+            // *without* running the main loop, so we can still return from here.
+            // The WorkerThread is leaked, but we never shutdown the global pool anyway.
+            let worker_thread = Box::leak(Box::new(WorkerThread::from(thread)));
+            let registry = &*worker_thread.registry;
+            let index = worker_thread.index;
+
+            unsafe {
+                WorkerThread::set_current(worker_thread);
+
+                // let registry know we are ready to do work
+                Latch::set(&registry.thread_infos[index].primed);
+            }
+
+            Ok(())
+        });
+
+        let fallback_result = Registry::new(builder);
+        if fallback_result.is_ok() {
+            return fallback_result;
+        }
+    }
+
+    result
+}
+
+struct Terminator<'a>(&'a Arc<Registry>);
+
+impl<'a> Drop for Terminator<'a> {
+    fn drop(&mut self) {
+        self.0.terminate()
+    }
+}
+
+impl Registry {
+    pub(super) fn new<S>(
+        mut builder: ThreadPoolBuilder<S>,
+    ) -> Result<Arc<Self>, ThreadPoolBuildError>
+    where
+        S: ThreadSpawn,
+    {
+        // Soft-limit the number of threads that we can actually support.
+        let n_threads = Ord::min(builder.get_num_threads(), crate::max_num_threads());
+
+        let breadth_first = builder.get_breadth_first();
+
+        let (workers, stealers): (Vec<_>, Vec<_>) = (0..n_threads)
+            .map(|_| {
+                let worker = if breadth_first { Worker::new_fifo() } else { Worker::new_lifo() };
+
+                let stealer = worker.stealer();
+                (worker, stealer)
+            })
+            .unzip();
+
+        let (broadcasts, broadcast_stealers): (Vec<_>, Vec<_>) = (0..n_threads)
+            .map(|_| {
+                let worker = Worker::new_fifo();
+                let stealer = worker.stealer();
+                (worker, stealer)
+            })
+            .unzip();
+
+        let registry = Arc::new(Registry {
+            thread_infos: stealers.into_iter().map(ThreadInfo::new).collect(),
+            sleep: Sleep::new(n_threads),
+            injected_jobs: Injector::new(),
+            broadcasts: Mutex::new(broadcasts),
+            terminate_count: AtomicUsize::new(1),
+            panic_handler: builder.take_panic_handler(),
+            deadlock_handler: builder.take_deadlock_handler(),
+            start_handler: builder.take_start_handler(),
+            exit_handler: builder.take_exit_handler(),
+            acquire_thread_handler: builder.take_acquire_thread_handler(),
+            release_thread_handler: builder.take_release_thread_handler(),
+        });
+
+        // If we return early or panic, make sure to terminate existing threads.
+        let t1000 = Terminator(&registry);
+
+        for (index, (worker, stealer)) in workers.into_iter().zip(broadcast_stealers).enumerate() {
+            let thread = ThreadBuilder {
+                name: builder.get_thread_name(index),
+                stack_size: builder.get_stack_size(),
+                registry: Arc::clone(&registry),
+                worker,
+                stealer,
+                index,
+            };
+            if let Err(e) = builder.get_spawn_handler().spawn(thread) {
+                return Err(ThreadPoolBuildError::new(ErrorKind::IOError(e)));
+            }
+        }
+
+        // Returning normally now, without termination.
+        mem::forget(t1000);
+
+        Ok(registry)
+    }
+
+    pub fn current() -> Arc<Registry> {
+        unsafe {
+            let worker_thread = WorkerThread::current();
+            let registry = if worker_thread.is_null() {
+                global_registry()
+            } else {
+                &(*worker_thread).registry
+            };
+            Arc::clone(registry)
+        }
+    }
+
+    /// Returns the number of threads in the current registry. This
+    /// is better than `Registry::current().num_threads()` because it
+    /// avoids incrementing the `Arc`.
+    pub(super) fn current_num_threads() -> usize {
+        unsafe {
+            let worker_thread = WorkerThread::current();
+            if worker_thread.is_null() {
+                global_registry().num_threads()
+            } else {
+                (*worker_thread).registry.num_threads()
+            }
+        }
+    }
+
+    /// Returns the current `WorkerThread` if it's part of this `Registry`.
+    pub(super) fn current_thread(&self) -> Option<&WorkerThread> {
+        unsafe {
+            let worker = WorkerThread::current().as_ref()?;
+            if worker.registry().id() == self.id() { Some(worker) } else { None }
+        }
+    }
+
+    /// Returns an opaque identifier for this registry.
+    pub(super) fn id(&self) -> RegistryId {
+        // We can rely on `self` not to change since we only ever create
+        // registries that are boxed up in an `Arc` (see `new()` above).
+        RegistryId { addr: self as *const Self as usize }
+    }
+
+    pub(super) fn num_threads(&self) -> usize {
+        self.thread_infos.len()
+    }
+
+    pub(super) fn catch_unwind(&self, f: impl FnOnce()) {
+        if let Err(err) = unwind::halt_unwinding(f) {
+            // If there is no handler, or if that handler itself panics, then we abort.
+            let abort_guard = unwind::AbortIfPanic;
+            if let Some(ref handler) = self.panic_handler {
+                handler(err);
+                mem::forget(abort_guard);
+            }
+        }
+    }
+
+    /// Waits for the worker threads to get up and running. This is
+    /// meant to be used for benchmarking purposes, primarily, so that
+    /// you can get more consistent numbers by having everything
+    /// "ready to go".
+    pub(super) fn wait_until_primed(&self) {
+        for info in &self.thread_infos {
+            info.primed.wait();
+        }
+    }
+
+    /// Waits for the worker threads to stop. This is used for testing
+    /// -- so we can check that termination actually works.
+    pub(super) fn wait_until_stopped(&self) {
+        self.release_thread();
+        for info in &self.thread_infos {
+            info.stopped.wait();
+        }
+        self.acquire_thread();
+    }
+
+    pub(crate) fn acquire_thread(&self) {
+        if let Some(ref acquire_thread_handler) = self.acquire_thread_handler {
+            acquire_thread_handler();
+        }
+    }
+
+    pub(crate) fn release_thread(&self) {
+        if let Some(ref release_thread_handler) = self.release_thread_handler {
+            release_thread_handler();
+        }
+    }
+
+    /// ////////////////////////////////////////////////////////////////////////
+    /// MAIN LOOP
+    ///
+    /// So long as all of the worker threads are hanging out in their
+    /// top-level loop, there is no work to be done.
+
+    /// Push a job into the given `registry`. If we are running on a
+    /// worker thread for the registry, this will push onto the
+    /// deque. Else, it will inject from the outside (which is slower).
+    pub(super) fn inject_or_push(&self, job_ref: JobRef) {
+        let worker_thread = WorkerThread::current();
+        unsafe {
+            if !worker_thread.is_null() && (*worker_thread).registry().id() == self.id() {
+                (*worker_thread).push(job_ref);
+            } else {
+                self.inject(job_ref);
+            }
+        }
+    }
+
+    /// Push a job into the "external jobs" queue; it will be taken by
+    /// whatever worker has nothing to do. Use this if you know that
+    /// you are not on a worker of this registry.
+    pub(super) fn inject(&self, injected_job: JobRef) {
+        // It should not be possible for `state.terminate` to be true
+        // here. It is only set to true when the user creates (and
+        // drops) a `ThreadPool`; and, in that case, they cannot be
+        // calling `inject()` later, since they dropped their
+        // `ThreadPool`.
+        debug_assert_ne!(
+            self.terminate_count.load(Ordering::Acquire),
+            0,
+            "inject() sees state.terminate as true"
+        );
+
+        let queue_was_empty = self.injected_jobs.is_empty();
+
+        self.injected_jobs.push(injected_job);
+        self.sleep.new_injected_jobs(1, queue_was_empty);
+    }
+
+    pub(crate) fn has_injected_job(&self) -> bool {
+        !self.injected_jobs.is_empty()
+    }
+
+    fn pop_injected_job(&self) -> Option<JobRef> {
+        loop {
+            match self.injected_jobs.steal() {
+                Steal::Success(job) => return Some(job),
+                Steal::Empty => return None,
+                Steal::Retry => {}
+            }
+        }
+    }
+
+    /// Push a job into each thread's own "external jobs" queue; it will be
+    /// executed only on that thread, when it has nothing else to do locally,
+    /// before it tries to steal other work.
+    ///
+    /// **Panics** if not given exactly as many jobs as there are threads.
+    pub(super) fn inject_broadcast(&self, injected_jobs: impl ExactSizeIterator<Item = JobRef>) {
+        assert_eq!(self.num_threads(), injected_jobs.len());
+        {
+            let broadcasts = self.broadcasts.lock().unwrap();
+
+            // It should not be possible for `state.terminate` to be true
+            // here. It is only set to true when the user creates (and
+            // drops) a `ThreadPool`; and, in that case, they cannot be
+            // calling `inject_broadcast()` later, since they dropped their
+            // `ThreadPool`.
+            debug_assert_ne!(
+                self.terminate_count.load(Ordering::Acquire),
+                0,
+                "inject_broadcast() sees state.terminate as true"
+            );
+
+            assert_eq!(broadcasts.len(), injected_jobs.len());
+            for (worker, job_ref) in broadcasts.iter().zip(injected_jobs) {
+                worker.push(job_ref);
+            }
+        }
+        for i in 0..self.num_threads() {
+            self.sleep.notify_worker_latch_is_set(i);
+        }
+    }
+
+    /// If already in a worker-thread of this registry, just execute `op`.
+    /// Otherwise, inject `op` in this thread-pool. Either way, block until `op`
+    /// completes and return its return value. If `op` panics, that panic will
+    /// be propagated as well. The second argument indicates `true` if injection
+    /// was performed, `false` if executed directly.
+    pub(super) fn in_worker<OP, R>(&self, op: OP) -> R
+    where
+        OP: FnOnce(&WorkerThread, bool) -> R + Send,
+        R: Send,
+    {
+        unsafe {
+            let worker_thread = WorkerThread::current();
+            if worker_thread.is_null() {
+                self.in_worker_cold(op)
+            } else if (*worker_thread).registry().id() != self.id() {
+                self.in_worker_cross(&*worker_thread, op)
+            } else {
+                // Perfectly valid to give them a `&T`: this is the
+                // current thread, so we know the data structure won't be
+                // invalidated until we return.
+                op(&*worker_thread, false)
+            }
+        }
+    }
+
+    #[cold]
+    unsafe fn in_worker_cold<OP, R>(&self, op: OP) -> R
+    where
+        OP: FnOnce(&WorkerThread, bool) -> R + Send,
+        R: Send,
+    {
+        thread_local!(static LOCK_LATCH: LockLatch = LockLatch::new());
+
+        LOCK_LATCH.with(|l| {
+            // This thread isn't a member of *any* thread pool, so just block.
+            debug_assert!(WorkerThread::current().is_null());
+            let job = StackJob::new(
+                Tlv::null(),
+                |injected| {
+                    let worker_thread = WorkerThread::current();
+                    assert!(injected && !worker_thread.is_null());
+                    op(unsafe { &*worker_thread }, true)
+                },
+                LatchRef::new(l),
+            );
+            self.inject(unsafe { job.as_job_ref() });
+            self.release_thread();
+            job.latch.wait_and_reset(); // Make sure we can use the same latch again next time.
+            self.acquire_thread();
+
+            unsafe { job.into_result() }
+        })
+    }
+
+    #[cold]
+    unsafe fn in_worker_cross<OP, R>(&self, current_thread: &WorkerThread, op: OP) -> R
+    where
+        OP: FnOnce(&WorkerThread, bool) -> R + Send,
+        R: Send,
+    {
+        // This thread is a member of a different pool, so let it process
+        // other work while waiting for this `op` to complete.
+        debug_assert!(current_thread.registry().id() != self.id());
+        let latch = SpinLatch::cross(current_thread);
+        let job = StackJob::new(
+            Tlv::null(),
+            |injected| {
+                let worker_thread = WorkerThread::current();
+                assert!(injected && !worker_thread.is_null());
+                op(unsafe { &*worker_thread }, true)
+            },
+            latch,
+        );
+        self.inject(unsafe { job.as_job_ref() });
+        unsafe { current_thread.wait_until(&job.latch) };
+        unsafe { job.into_result() }
+    }
+
+    /// Increments the terminate counter. This increment should be
+    /// balanced by a call to `terminate`, which will decrement. This
+    /// is used when spawning asynchronous work, which needs to
+    /// prevent the registry from terminating so long as it is active.
+    ///
+    /// Note that blocking functions such as `join` and `scope` do not
+    /// need to concern themselves with this fn; their context is
+    /// responsible for ensuring the current thread-pool will not
+    /// terminate until they return.
+    ///
+    /// The global thread-pool always has an outstanding reference
+    /// (the initial one). Custom thread-pools have one outstanding
+    /// reference that is dropped when the `ThreadPool` is dropped:
+    /// since installing the thread-pool blocks until any joins/scopes
+    /// complete, this ensures that joins/scopes are covered.
+    ///
+    /// The exception is `::spawn()`, which can create a job outside
+    /// of any blocking scope. In that case, the job itself holds a
+    /// terminate count and is responsible for invoking `terminate()`
+    /// when finished.
+    pub(super) fn increment_terminate_count(&self) {
+        let previous = self.terminate_count.fetch_add(1, Ordering::AcqRel);
+        debug_assert!(previous != 0, "registry ref count incremented from zero");
+        assert!(previous != usize::MAX, "overflow in registry ref count");
+    }
+
+    /// Signals that the thread-pool which owns this registry has been
+    /// dropped. The worker threads will gradually terminate, once any
+    /// extant work is completed.
+    pub(super) fn terminate(&self) {
+        if self.terminate_count.fetch_sub(1, Ordering::AcqRel) == 1 {
+            for (i, thread_info) in self.thread_infos.iter().enumerate() {
+                unsafe { OnceLatch::set_and_tickle_one(&thread_info.terminate, self, i) };
+            }
+        }
+    }
+
+    /// Notify the worker that the latch they are sleeping on has been "set".
+    pub(super) fn notify_worker_latch_is_set(&self, target_worker_index: usize) {
+        self.sleep.notify_worker_latch_is_set(target_worker_index);
+    }
+}
+
+/// Mark a Rayon worker thread as blocked. This triggers the deadlock handler
+/// if no other worker thread is active
+#[inline]
+pub fn mark_blocked() {
+    let worker_thread = WorkerThread::current();
+    assert!(!worker_thread.is_null());
+    unsafe {
+        let registry = &(*worker_thread).registry;
+        registry.sleep.mark_blocked(&registry.deadlock_handler)
+    }
+}
+
+/// Mark a previously blocked Rayon worker thread as unblocked
+#[inline]
+pub fn mark_unblocked(registry: &Registry) {
+    registry.sleep.mark_unblocked()
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub(super) struct RegistryId {
+    addr: usize,
+}
+
+struct ThreadInfo {
+    /// Latch set once thread has started and we are entering into the
+    /// main loop. Used to wait for worker threads to become primed,
+    /// primarily of interest for benchmarking.
+    primed: LockLatch,
+
+    /// Latch is set once worker thread has completed. Used to wait
+    /// until workers have stopped; only used for tests.
+    stopped: LockLatch,
+
+    /// The latch used to signal that terminated has been requested.
+    /// This latch is *set* by the `terminate` method on the
+    /// `Registry`, once the registry's main "terminate" counter
+    /// reaches zero.
+    terminate: OnceLatch,
+
+    /// the "stealer" half of the worker's deque
+    stealer: Stealer<JobRef>,
+}
+
+impl ThreadInfo {
+    fn new(stealer: Stealer<JobRef>) -> ThreadInfo {
+        ThreadInfo {
+            primed: LockLatch::new(),
+            stopped: LockLatch::new(),
+            terminate: OnceLatch::new(),
+            stealer,
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// WorkerThread identifiers
+
+pub(super) struct WorkerThread {
+    /// the "worker" half of our local deque
+    worker: Worker<JobRef>,
+
+    /// the "stealer" half of the worker's broadcast deque
+    stealer: Stealer<JobRef>,
+
+    /// local queue used for `spawn_fifo` indirection
+    fifo: JobFifo,
+
+    pub(crate) index: usize,
+
+    /// A weak random number generator.
+    rng: XorShift64Star,
+
+    pub(crate) registry: Arc<Registry>,
+}
+
+// This is a bit sketchy, but basically: the WorkerThread is
+// allocated on the stack of the worker on entry and stored into this
+// thread local variable. So it will remain valid at least until the
+// worker is fully unwound. Using an unsafe pointer avoids the need
+// for a RefCell<T> etc.
+thread_local! {
+    static WORKER_THREAD_STATE: Cell<*const WorkerThread> = const { Cell::new(ptr::null()) };
+}
+
+impl From<ThreadBuilder> for WorkerThread {
+    fn from(thread: ThreadBuilder) -> Self {
+        Self {
+            worker: thread.worker,
+            stealer: thread.stealer,
+            fifo: JobFifo::new(),
+            index: thread.index,
+            rng: XorShift64Star::new(),
+            registry: thread.registry,
+        }
+    }
+}
+
+impl Drop for WorkerThread {
+    fn drop(&mut self) {
+        // Undo `set_current`
+        WORKER_THREAD_STATE.with(|t| {
+            assert!(t.get().eq(&(self as *const _)));
+            t.set(ptr::null());
+        });
+    }
+}
+
+impl WorkerThread {
+    /// Gets the `WorkerThread` index for the current thread; returns
+    /// NULL if this is not a worker thread. This pointer is valid
+    /// anywhere on the current thread.
+    #[inline]
+    pub(super) fn current() -> *const WorkerThread {
+        WORKER_THREAD_STATE.with(Cell::get)
+    }
+
+    /// Sets `self` as the worker thread index for the current thread.
+    /// This is done during worker thread startup.
+    unsafe fn set_current(thread: *const WorkerThread) {
+        WORKER_THREAD_STATE.with(|t| {
+            assert!(t.get().is_null());
+            t.set(thread);
+        });
+    }
+
+    /// Returns the registry that owns this worker thread.
+    #[inline]
+    pub(super) fn registry(&self) -> &Arc<Registry> {
+        &self.registry
+    }
+
+    /// Our index amongst the worker threads (ranges from `0..self.num_threads()`).
+    #[inline]
+    pub(super) fn index(&self) -> usize {
+        self.index
+    }
+
+    #[inline]
+    pub(super) unsafe fn push(&self, job: JobRef) {
+        let queue_was_empty = self.worker.is_empty();
+        self.worker.push(job);
+        self.registry.sleep.new_internal_jobs(1, queue_was_empty);
+    }
+
+    #[inline]
+    pub(super) unsafe fn push_fifo(&self, job: JobRef) {
+        unsafe { self.push(self.fifo.push(job)) };
+    }
+
+    #[inline]
+    pub(super) fn local_deque_is_empty(&self) -> bool {
+        self.worker.is_empty()
+    }
+
+    /// Attempts to obtain a "local" job -- typically this means
+    /// popping from the top of the stack, though if we are configured
+    /// for breadth-first execution, it would mean dequeuing from the
+    /// bottom.
+    #[inline]
+    pub(super) fn take_local_job(&self) -> Option<JobRef> {
+        let popped_job = self.worker.pop();
+
+        if popped_job.is_some() {
+            return popped_job;
+        }
+
+        loop {
+            match self.stealer.steal() {
+                Steal::Success(job) => return Some(job),
+                Steal::Empty => return None,
+                Steal::Retry => {}
+            }
+        }
+    }
+
+    pub(super) fn has_injected_job(&self) -> bool {
+        !self.stealer.is_empty() || self.registry.has_injected_job()
+    }
+
+    /// Wait until the latch is set. Try to keep busy by popping and
+    /// stealing tasks as necessary.
+    #[inline]
+    pub(super) unsafe fn wait_until<L: AsCoreLatch + ?Sized>(&self, latch: &L) {
+        let latch = latch.as_core_latch();
+        if !latch.probe() {
+            unsafe { self.wait_until_cold(latch) };
+        }
+    }
+
+    #[cold]
+    unsafe fn wait_until_cold(&self, latch: &CoreLatch) {
+        // the code below should swallow all panics and hence never
+        // unwind; but if something does wrong, we want to abort,
+        // because otherwise other code in rayon may assume that the
+        // latch has been signaled, and that can lead to random memory
+        // accesses, which would be *very bad*
+        let abort_guard = unwind::AbortIfPanic;
+
+        'outer: while !latch.probe() {
+            // Check for local work *before* we start marking ourself idle,
+            // especially to avoid modifying shared sleep state.
+            if let Some(job) = self.take_local_job() {
+                unsafe { self.execute(job) };
+                continue;
+            }
+
+            let mut idle_state = self.registry.sleep.start_looking(self.index);
+            while !latch.probe() {
+                if let Some(job) = self.find_work() {
+                    self.registry.sleep.work_found();
+                    unsafe { self.execute(job) };
+                    // The job might have injected local work, so go back to the outer loop.
+                    continue 'outer;
+                } else {
+                    self.registry.sleep.no_work_found(&mut idle_state, latch, &self)
+                }
+            }
+
+            // If we were sleepy, we are not anymore. We "found work" --
+            // whatever the surrounding thread was doing before it had to wait.
+            self.registry.sleep.work_found();
+            break;
+        }
+
+        mem::forget(abort_guard); // successful execution, do not abort
+    }
+
+    unsafe fn wait_until_out_of_work(&self) {
+        debug_assert_eq!(self as *const _, WorkerThread::current());
+        let registry = &*self.registry;
+        let index = self.index;
+
+        registry.acquire_thread();
+        unsafe { self.wait_until(&registry.thread_infos[index].terminate) };
+
+        // Should not be any work left in our queue.
+        debug_assert!(self.take_local_job().is_none());
+
+        // Let registry know we are done
+        unsafe { Latch::set(&registry.thread_infos[index].stopped) };
+    }
+
+    fn find_work(&self) -> Option<JobRef> {
+        // Try to find some work to do. We give preference first
+        // to things in our local deque, then in other workers
+        // deques, and finally to injected jobs from the
+        // outside. The idea is to finish what we started before
+        // we take on something new.
+        self.take_local_job().or_else(|| self.steal()).or_else(|| self.registry.pop_injected_job())
+    }
+
+    pub(super) fn yield_now(&self) -> Yield {
+        match self.find_work() {
+            Some(job) => unsafe {
+                self.execute(job);
+                Yield::Executed
+            },
+            None => Yield::Idle,
+        }
+    }
+
+    pub(super) fn yield_local(&self) -> Yield {
+        match self.take_local_job() {
+            Some(job) => unsafe {
+                self.execute(job);
+                Yield::Executed
+            },
+            None => Yield::Idle,
+        }
+    }
+
+    #[inline]
+    pub(super) unsafe fn execute(&self, job: JobRef) {
+        unsafe { job.execute() };
+    }
+
+    /// Try to steal a single job and return it.
+    ///
+    /// This should only be done as a last resort, when there is no
+    /// local work to do.
+    fn steal(&self) -> Option<JobRef> {
+        // we only steal when we don't have any work to do locally
+        debug_assert!(self.local_deque_is_empty());
+
+        // otherwise, try to steal
+        let thread_infos = &self.registry.thread_infos.as_slice();
+        let num_threads = thread_infos.len();
+        if num_threads <= 1 {
+            return None;
+        }
+
+        loop {
+            let mut retry = false;
+            let start = self.rng.next_usize(num_threads);
+            let job = (start..num_threads)
+                .chain(0..start)
+                .filter(move |&i| i != self.index)
+                .find_map(|victim_index| {
+                    let victim = &thread_infos[victim_index];
+                    match victim.stealer.steal() {
+                        Steal::Success(job) => Some(job),
+                        Steal::Empty => None,
+                        Steal::Retry => {
+                            retry = true;
+                            None
+                        }
+                    }
+                });
+            if job.is_some() || !retry {
+                return job;
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+unsafe fn main_loop(thread: ThreadBuilder) {
+    let worker_thread = &WorkerThread::from(thread);
+    unsafe { WorkerThread::set_current(worker_thread) };
+    let registry = &*worker_thread.registry;
+    let index = worker_thread.index;
+
+    // let registry know we are ready to do work
+    unsafe { Latch::set(&registry.thread_infos[index].primed) };
+
+    // Worker threads should not panic. If they do, just abort, as the
+    // internal state of the threadpool is corrupted. Note that if
+    // **user code** panics, we should catch that and redirect.
+    let abort_guard = unwind::AbortIfPanic;
+
+    // Inform a user callback that we started a thread.
+    if let Some(ref handler) = registry.start_handler {
+        registry.catch_unwind(|| handler(index));
+    }
+
+    unsafe { worker_thread.wait_until_out_of_work() };
+
+    // Normal termination, do not abort.
+    mem::forget(abort_guard);
+
+    // Inform a user callback that we exited a thread.
+    if let Some(ref handler) = registry.exit_handler {
+        registry.catch_unwind(|| handler(index));
+        // We're already exiting the thread, there's nothing else to do.
+    }
+
+    registry.release_thread();
+}
+
+/// If already in a worker-thread, just execute `op`. Otherwise,
+/// execute `op` in the default thread-pool. Either way, block until
+/// `op` completes and return its return value. If `op` panics, that
+/// panic will be propagated as well. The second argument indicates
+/// `true` if injection was performed, `false` if executed directly.
+pub(super) fn in_worker<OP, R>(op: OP) -> R
+where
+    OP: FnOnce(&WorkerThread, bool) -> R + Send,
+    R: Send,
+{
+    unsafe {
+        let owner_thread = WorkerThread::current();
+        if !owner_thread.is_null() {
+            // Perfectly valid to give them a `&T`: this is the
+            // current thread, so we know the data structure won't be
+            // invalidated until we return.
+            op(&*owner_thread, false)
+        } else {
+            global_registry().in_worker(op)
+        }
+    }
+}
+
+/// [xorshift*] is a fast pseudorandom number generator which will
+/// even tolerate weak seeding, as long as it's not zero.
+///
+/// [xorshift*]: https://en.wikipedia.org/wiki/Xorshift#xorshift*
+struct XorShift64Star {
+    state: Cell<u64>,
+}
+
+impl XorShift64Star {
+    fn new() -> Self {
+        // Any non-zero seed will do -- this uses the hash of a global counter.
+        let mut seed = 0;
+        while seed == 0 {
+            let mut hasher = DefaultHasher::new();
+            static COUNTER: AtomicUsize = AtomicUsize::new(0);
+            hasher.write_usize(COUNTER.fetch_add(1, Ordering::Relaxed));
+            seed = hasher.finish();
+        }
+
+        XorShift64Star { state: Cell::new(seed) }
+    }
+
+    fn next(&self) -> u64 {
+        let mut x = self.state.get();
+        debug_assert_ne!(x, 0);
+        x ^= x >> 12;
+        x ^= x << 25;
+        x ^= x >> 27;
+        self.state.set(x);
+        x.wrapping_mul(0x2545_f491_4f6c_dd1d)
+    }
+
+    /// Return a value from `0..n`.
+    fn next_usize(&self, n: usize) -> usize {
+        (self.next() % n as u64) as usize
+    }
+}
diff --git a/compiler/rustc_thread_pool/src/scope/mod.rs b/compiler/rustc_thread_pool/src/scope/mod.rs
new file mode 100644
index 0000000000000..55e58b3509d7d
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/scope/mod.rs
@@ -0,0 +1,783 @@
+//! Methods for custom fork-join scopes, created by the [`scope()`]
+//! and [`in_place_scope()`] functions. These are a more flexible alternative to [`join()`].
+//!
+//! [`scope()`]: fn.scope.html
+//! [`in_place_scope()`]: fn.in_place_scope.html
+//! [`join()`]: ../join/join.fn.html
+
+use std::any::Any;
+use std::marker::PhantomData;
+use std::mem::ManuallyDrop;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicPtr, Ordering};
+use std::{fmt, ptr};
+
+use crate::broadcast::BroadcastContext;
+use crate::job::{ArcJob, HeapJob, JobFifo, JobRef};
+use crate::latch::{CountLatch, Latch};
+use crate::registry::{Registry, WorkerThread, global_registry, in_worker};
+use crate::tlv::{self, Tlv};
+use crate::unwind;
+
+#[cfg(test)]
+mod tests;
+
+/// Represents a fork-join scope which can be used to spawn any number of tasks.
+/// See [`scope()`] for more information.
+///
+///[`scope()`]: fn.scope.html
+pub struct Scope<'scope> {
+    base: ScopeBase<'scope>,
+}
+
+/// Represents a fork-join scope which can be used to spawn any number of tasks.
+/// Those spawned from the same thread are prioritized in relative FIFO order.
+/// See [`scope_fifo()`] for more information.
+///
+///[`scope_fifo()`]: fn.scope_fifo.html
+pub struct ScopeFifo<'scope> {
+    base: ScopeBase<'scope>,
+    fifos: Vec<JobFifo>,
+}
+
+struct ScopeBase<'scope> {
+    /// thread registry where `scope()` was executed or where `in_place_scope()`
+    /// should spawn jobs.
+    registry: Arc<Registry>,
+
+    /// if some job panicked, the error is stored here; it will be
+    /// propagated to the one who created the scope
+    panic: AtomicPtr<Box<dyn Any + Send + 'static>>,
+
+    /// latch to track job counts
+    job_completed_latch: CountLatch,
+
+    /// You can think of a scope as containing a list of closures to execute,
+    /// all of which outlive `'scope`. They're not actually required to be
+    /// `Sync`, but it's still safe to let the `Scope` implement `Sync` because
+    /// the closures are only *moved* across threads to be executed.
+    #[allow(clippy::type_complexity)]
+    marker: PhantomData<Box<dyn FnOnce(&Scope<'scope>) + Send + Sync + 'scope>>,
+
+    /// The TLV at the scope's creation. Used to set the TLV for spawned jobs.
+    tlv: Tlv,
+}
+
+/// Creates a "fork-join" scope `s` and invokes the closure with a
+/// reference to `s`. This closure can then spawn asynchronous tasks
+/// into `s`. Those tasks may run asynchronously with respect to the
+/// closure; they may themselves spawn additional tasks into `s`. When
+/// the closure returns, it will block until all tasks that have been
+/// spawned into `s` complete.
+///
+/// `scope()` is a more flexible building block compared to `join()`,
+/// since a loop can be used to spawn any number of tasks without
+/// recursing. However, that flexibility comes at a performance price:
+/// tasks spawned using `scope()` must be allocated onto the heap,
+/// whereas `join()` can make exclusive use of the stack. **Prefer
+/// `join()` (or, even better, parallel iterators) where possible.**
+///
+/// # Example
+///
+/// The Rayon `join()` function launches two closures and waits for them
+/// to stop. One could implement `join()` using a scope like so, although
+/// it would be less efficient than the real implementation:
+///
+/// ```rust
+/// # use rustc_thread_pool as rayon;
+/// pub fn join<A,B,RA,RB>(oper_a: A, oper_b: B) -> (RA, RB)
+///     where A: FnOnce() -> RA + Send,
+///           B: FnOnce() -> RB + Send,
+///           RA: Send,
+///           RB: Send,
+/// {
+///     let mut result_a: Option<RA> = None;
+///     let mut result_b: Option<RB> = None;
+///     rayon::scope(|s| {
+///         s.spawn(|_| result_a = Some(oper_a()));
+///         s.spawn(|_| result_b = Some(oper_b()));
+///     });
+///     (result_a.unwrap(), result_b.unwrap())
+/// }
+/// ```
+///
+/// # A note on threading
+///
+/// The closure given to `scope()` executes in the Rayon thread-pool,
+/// as do those given to `spawn()`. This means that you can't access
+/// thread-local variables (well, you can, but they may have
+/// unexpected values).
+///
+/// # Task execution
+///
+/// Task execution potentially starts as soon as `spawn()` is called.
+/// The task will end sometime before `scope()` returns. Note that the
+/// *closure* given to scope may return much earlier. In general
+/// the lifetime of a scope created like `scope(body)` goes something like this:
+///
+/// - Scope begins when `scope(body)` is called
+/// - Scope body `body()` is invoked
+///     - Scope tasks may be spawned
+/// - Scope body returns
+/// - Scope tasks execute, possibly spawning more tasks
+/// - Once all tasks are done, scope ends and `scope()` returns
+///
+/// To see how and when tasks are joined, consider this example:
+///
+/// ```rust
+/// # use rustc_thread_pool as rayon;
+/// // point start
+/// rayon::scope(|s| {
+///     s.spawn(|s| { // task s.1
+///         s.spawn(|s| { // task s.1.1
+///             rayon::scope(|t| {
+///                 t.spawn(|_| ()); // task t.1
+///                 t.spawn(|_| ()); // task t.2
+///             });
+///         });
+///     });
+///     s.spawn(|s| { // task s.2
+///     });
+///     // point mid
+/// });
+/// // point end
+/// ```
+///
+/// The various tasks that are run will execute roughly like so:
+///
+/// ```notrust
+/// | (start)
+/// |
+/// | (scope `s` created)
+/// +-----------------------------------------------+ (task s.2)
+/// +-------+ (task s.1)                            |
+/// |       |                                       |
+/// |       +---+ (task s.1.1)                      |
+/// |       |   |                                   |
+/// |       |   | (scope `t` created)               |
+/// |       |   +----------------+ (task t.2)       |
+/// |       |   +---+ (task t.1) |                  |
+/// | (mid) |   |   |            |                  |
+/// :       |   + <-+------------+ (scope `t` ends) |
+/// :       |   |                                   |
+/// |<------+---+-----------------------------------+ (scope `s` ends)
+/// |
+/// | (end)
+/// ```
+///
+/// The point here is that everything spawned into scope `s` will
+/// terminate (at latest) at the same point -- right before the
+/// original call to `rayon::scope` returns. This includes new
+/// subtasks created by other subtasks (e.g., task `s.1.1`). If a new
+/// scope is created (such as `t`), the things spawned into that scope
+/// will be joined before that scope returns, which in turn occurs
+/// before the creating task (task `s.1.1` in this case) finishes.
+///
+/// There is no guaranteed order of execution for spawns in a scope,
+/// given that other threads may steal tasks at any time. However, they
+/// are generally prioritized in a LIFO order on the thread from which
+/// they were spawned. So in this example, absent any stealing, we can
+/// expect `s.2` to execute before `s.1`, and `t.2` before `t.1`. Other
+/// threads always steal from the other end of the deque, like FIFO
+/// order. The idea is that "recent" tasks are most likely to be fresh
+/// in the local CPU's cache, while other threads can steal older
+/// "stale" tasks. For an alternate approach, consider
+/// [`scope_fifo()`] instead.
+///
+/// [`scope_fifo()`]: fn.scope_fifo.html
+///
+/// # Accessing stack data
+///
+/// In general, spawned tasks may access stack data in place that
+/// outlives the scope itself. Other data must be fully owned by the
+/// spawned task.
+///
+/// ```rust
+/// # use rustc_thread_pool as rayon;
+/// let ok: Vec<i32> = vec![1, 2, 3];
+/// rayon::scope(|s| {
+///     let bad: Vec<i32> = vec![4, 5, 6];
+///     s.spawn(|_| {
+///         // We can access `ok` because outlives the scope `s`.
+///         println!("ok: {:?}", ok);
+///
+///         // If we just try to use `bad` here, the closure will borrow `bad`
+///         // (because we are just printing it out, and that only requires a
+///         // borrow), which will result in a compilation error. Read on
+///         // for options.
+///         // println!("bad: {:?}", bad);
+///    });
+/// });
+/// ```
+///
+/// As the comments example above suggest, to reference `bad` we must
+/// take ownership of it. One way to do this is to detach the closure
+/// from the surrounding stack frame, using the `move` keyword. This
+/// will cause it to take ownership of *all* the variables it touches,
+/// in this case including both `ok` *and* `bad`:
+///
+/// ```rust
+/// # use rustc_thread_pool as rayon;
+/// let ok: Vec<i32> = vec![1, 2, 3];
+/// rayon::scope(|s| {
+///     let bad: Vec<i32> = vec![4, 5, 6];
+///     s.spawn(move |_| {
+///         println!("ok: {:?}", ok);
+///         println!("bad: {:?}", bad);
+///     });
+///
+///     // That closure is fine, but now we can't use `ok` anywhere else,
+///     // since it is owned by the previous task:
+///     // s.spawn(|_| println!("ok: {:?}", ok));
+/// });
+/// ```
+///
+/// While this works, it could be a problem if we want to use `ok` elsewhere.
+/// There are two choices. We can keep the closure as a `move` closure, but
+/// instead of referencing the variable `ok`, we create a shadowed variable that
+/// is a borrow of `ok` and capture *that*:
+///
+/// ```rust
+/// # use rustc_thread_pool as rayon;
+/// let ok: Vec<i32> = vec![1, 2, 3];
+/// rayon::scope(|s| {
+///     let bad: Vec<i32> = vec![4, 5, 6];
+///     let ok: &Vec<i32> = &ok; // shadow the original `ok`
+///     s.spawn(move |_| {
+///         println!("ok: {:?}", ok); // captures the shadowed version
+///         println!("bad: {:?}", bad);
+///     });
+///
+///     // Now we too can use the shadowed `ok`, since `&Vec<i32>` references
+///     // can be shared freely. Note that we need a `move` closure here though,
+///     // because otherwise we'd be trying to borrow the shadowed `ok`,
+///     // and that doesn't outlive `scope`.
+///     s.spawn(move |_| println!("ok: {:?}", ok));
+/// });
+/// ```
+///
+/// Another option is not to use the `move` keyword but instead to take ownership
+/// of individual variables:
+///
+/// ```rust
+/// # use rustc_thread_pool as rayon;
+/// let ok: Vec<i32> = vec![1, 2, 3];
+/// rayon::scope(|s| {
+///     let bad: Vec<i32> = vec![4, 5, 6];
+///     s.spawn(|_| {
+///         // Transfer ownership of `bad` into a local variable (also named `bad`).
+///         // This will force the closure to take ownership of `bad` from the environment.
+///         let bad = bad;
+///         println!("ok: {:?}", ok); // `ok` is only borrowed.
+///         println!("bad: {:?}", bad); // refers to our local variable, above.
+///     });
+///
+///     s.spawn(|_| println!("ok: {:?}", ok)); // we too can borrow `ok`
+/// });
+/// ```
+///
+/// # Panics
+///
+/// If a panic occurs, either in the closure given to `scope()` or in
+/// any of the spawned jobs, that panic will be propagated and the
+/// call to `scope()` will panic. If multiple panics occurs, it is
+/// non-deterministic which of their panic values will propagate.
+/// Regardless, once a task is spawned using `scope.spawn()`, it will
+/// execute, even if the spawning task should later panic. `scope()`
+/// returns once all spawned jobs have completed, and any panics are
+/// propagated at that point.
+pub fn scope<'scope, OP, R>(op: OP) -> R
+where
+    OP: FnOnce(&Scope<'scope>) -> R + Send,
+    R: Send,
+{
+    in_worker(|owner_thread, _| {
+        let scope = Scope::<'scope>::new(Some(owner_thread), None);
+        scope.base.complete(Some(owner_thread), || op(&scope))
+    })
+}
+
+/// Creates a "fork-join" scope `s` with FIFO order, and invokes the
+/// closure with a reference to `s`. This closure can then spawn
+/// asynchronous tasks into `s`. Those tasks may run asynchronously with
+/// respect to the closure; they may themselves spawn additional tasks
+/// into `s`. When the closure returns, it will block until all tasks
+/// that have been spawned into `s` complete.
+///
+/// # Task execution
+///
+/// Tasks in a `scope_fifo()` run similarly to [`scope()`], but there's a
+/// difference in the order of execution. Consider a similar example:
+///
+/// [`scope()`]: fn.scope.html
+///
+/// ```rust
+/// # use rustc_thread_pool as rayon;
+/// // point start
+/// rayon::scope_fifo(|s| {
+///     s.spawn_fifo(|s| { // task s.1
+///         s.spawn_fifo(|s| { // task s.1.1
+///             rayon::scope_fifo(|t| {
+///                 t.spawn_fifo(|_| ()); // task t.1
+///                 t.spawn_fifo(|_| ()); // task t.2
+///             });
+///         });
+///     });
+///     s.spawn_fifo(|s| { // task s.2
+///     });
+///     // point mid
+/// });
+/// // point end
+/// ```
+///
+/// The various tasks that are run will execute roughly like so:
+///
+/// ```notrust
+/// | (start)
+/// |
+/// | (FIFO scope `s` created)
+/// +--------------------+ (task s.1)
+/// +-------+ (task s.2) |
+/// |       |            +---+ (task s.1.1)
+/// |       |            |   |
+/// |       |            |   | (FIFO scope `t` created)
+/// |       |            |   +----------------+ (task t.1)
+/// |       |            |   +---+ (task t.2) |
+/// | (mid) |            |   |   |            |
+/// :       |            |   + <-+------------+ (scope `t` ends)
+/// :       |            |   |
+/// |<------+------------+---+ (scope `s` ends)
+/// |
+/// | (end)
+/// ```
+///
+/// Under `scope_fifo()`, the spawns are prioritized in a FIFO order on
+/// the thread from which they were spawned, as opposed to `scope()`'s
+/// LIFO. So in this example, we can expect `s.1` to execute before
+/// `s.2`, and `t.1` before `t.2`. Other threads also steal tasks in
+/// FIFO order, as usual. Overall, this has roughly the same order as
+/// the now-deprecated [`breadth_first`] option, except the effect is
+/// isolated to a particular scope. If spawns are intermingled from any
+/// combination of `scope()` and `scope_fifo()`, or from different
+/// threads, their order is only specified with respect to spawns in the
+/// same scope and thread.
+///
+/// For more details on this design, see Rayon [RFC #1].
+///
+/// [`breadth_first`]: struct.ThreadPoolBuilder.html#method.breadth_first
+/// [RFC #1]: https://github.com/rayon-rs/rfcs/blob/master/accepted/rfc0001-scope-scheduling.md
+///
+/// # Panics
+///
+/// If a panic occurs, either in the closure given to `scope_fifo()` or
+/// in any of the spawned jobs, that panic will be propagated and the
+/// call to `scope_fifo()` will panic. If multiple panics occurs, it is
+/// non-deterministic which of their panic values will propagate.
+/// Regardless, once a task is spawned using `scope.spawn_fifo()`, it
+/// will execute, even if the spawning task should later panic.
+/// `scope_fifo()` returns once all spawned jobs have completed, and any
+/// panics are propagated at that point.
+pub fn scope_fifo<'scope, OP, R>(op: OP) -> R
+where
+    OP: FnOnce(&ScopeFifo<'scope>) -> R + Send,
+    R: Send,
+{
+    in_worker(|owner_thread, _| {
+        let scope = ScopeFifo::<'scope>::new(Some(owner_thread), None);
+        scope.base.complete(Some(owner_thread), || op(&scope))
+    })
+}
+
+/// Creates a "fork-join" scope `s` and invokes the closure with a
+/// reference to `s`. This closure can then spawn asynchronous tasks
+/// into `s`. Those tasks may run asynchronously with respect to the
+/// closure; they may themselves spawn additional tasks into `s`. When
+/// the closure returns, it will block until all tasks that have been
+/// spawned into `s` complete.
+///
+/// This is just like `scope()` except the closure runs on the same thread
+/// that calls `in_place_scope()`. Only work that it spawns runs in the
+/// thread pool.
+///
+/// # Panics
+///
+/// If a panic occurs, either in the closure given to `in_place_scope()` or in
+/// any of the spawned jobs, that panic will be propagated and the
+/// call to `in_place_scope()` will panic. If multiple panics occurs, it is
+/// non-deterministic which of their panic values will propagate.
+/// Regardless, once a task is spawned using `scope.spawn()`, it will
+/// execute, even if the spawning task should later panic. `in_place_scope()`
+/// returns once all spawned jobs have completed, and any panics are
+/// propagated at that point.
+pub fn in_place_scope<'scope, OP, R>(op: OP) -> R
+where
+    OP: FnOnce(&Scope<'scope>) -> R,
+{
+    do_in_place_scope(None, op)
+}
+
+pub(crate) fn do_in_place_scope<'scope, OP, R>(registry: Option<&Arc<Registry>>, op: OP) -> R
+where
+    OP: FnOnce(&Scope<'scope>) -> R,
+{
+    let thread = unsafe { WorkerThread::current().as_ref() };
+    let scope = Scope::<'scope>::new(thread, registry);
+    scope.base.complete(thread, || op(&scope))
+}
+
+/// Creates a "fork-join" scope `s` with FIFO order, and invokes the
+/// closure with a reference to `s`. This closure can then spawn
+/// asynchronous tasks into `s`. Those tasks may run asynchronously with
+/// respect to the closure; they may themselves spawn additional tasks
+/// into `s`. When the closure returns, it will block until all tasks
+/// that have been spawned into `s` complete.
+///
+/// This is just like `scope_fifo()` except the closure runs on the same thread
+/// that calls `in_place_scope_fifo()`. Only work that it spawns runs in the
+/// thread pool.
+///
+/// # Panics
+///
+/// If a panic occurs, either in the closure given to `in_place_scope_fifo()` or in
+/// any of the spawned jobs, that panic will be propagated and the
+/// call to `in_place_scope_fifo()` will panic. If multiple panics occurs, it is
+/// non-deterministic which of their panic values will propagate.
+/// Regardless, once a task is spawned using `scope.spawn_fifo()`, it will
+/// execute, even if the spawning task should later panic. `in_place_scope_fifo()`
+/// returns once all spawned jobs have completed, and any panics are
+/// propagated at that point.
+pub fn in_place_scope_fifo<'scope, OP, R>(op: OP) -> R
+where
+    OP: FnOnce(&ScopeFifo<'scope>) -> R,
+{
+    do_in_place_scope_fifo(None, op)
+}
+
+pub(crate) fn do_in_place_scope_fifo<'scope, OP, R>(registry: Option<&Arc<Registry>>, op: OP) -> R
+where
+    OP: FnOnce(&ScopeFifo<'scope>) -> R,
+{
+    let thread = unsafe { WorkerThread::current().as_ref() };
+    let scope = ScopeFifo::<'scope>::new(thread, registry);
+    scope.base.complete(thread, || op(&scope))
+}
+
+impl<'scope> Scope<'scope> {
+    fn new(owner: Option<&WorkerThread>, registry: Option<&Arc<Registry>>) -> Self {
+        let base = ScopeBase::new(owner, registry);
+        Scope { base }
+    }
+
+    /// Spawns a job into the fork-join scope `self`. This job will
+    /// execute sometime before the fork-join scope completes. The
+    /// job is specified as a closure, and this closure receives its
+    /// own reference to the scope `self` as argument. This can be
+    /// used to inject new jobs into `self`.
+    ///
+    /// # Returns
+    ///
+    /// Nothing. The spawned closures cannot pass back values to the
+    /// caller directly, though they can write to local variables on
+    /// the stack (if those variables outlive the scope) or
+    /// communicate through shared channels.
+    ///
+    /// (The intention is to eventually integrate with Rust futures to
+    /// support spawns of functions that compute a value.)
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use rustc_thread_pool as rayon;
+    /// let mut value_a = None;
+    /// let mut value_b = None;
+    /// let mut value_c = None;
+    /// rayon::scope(|s| {
+    ///     s.spawn(|s1| {
+    ///           // ^ this is the same scope as `s`; this handle `s1`
+    ///           //   is intended for use by the spawned task,
+    ///           //   since scope handles cannot cross thread boundaries.
+    ///
+    ///         value_a = Some(22);
+    ///
+    ///         // the scope `s` will not end until all these tasks are done
+    ///         s1.spawn(|_| {
+    ///             value_b = Some(44);
+    ///         });
+    ///     });
+    ///
+    ///     s.spawn(|_| {
+    ///         value_c = Some(66);
+    ///     });
+    /// });
+    /// assert_eq!(value_a, Some(22));
+    /// assert_eq!(value_b, Some(44));
+    /// assert_eq!(value_c, Some(66));
+    /// ```
+    ///
+    /// # See also
+    ///
+    /// The [`scope` function] has more extensive documentation about
+    /// task spawning.
+    ///
+    /// [`scope` function]: fn.scope.html
+    pub fn spawn<BODY>(&self, body: BODY)
+    where
+        BODY: FnOnce(&Scope<'scope>) + Send + 'scope,
+    {
+        let scope_ptr = ScopePtr(self);
+        let job = HeapJob::new(self.base.tlv, move || unsafe {
+            // SAFETY: this job will execute before the scope ends.
+            let scope = scope_ptr.as_ref();
+            ScopeBase::execute_job(&scope.base, move || body(scope))
+        });
+        let job_ref = self.base.heap_job_ref(job);
+
+        // Since `Scope` implements `Sync`, we can't be sure that we're still in a
+        // thread of this pool, so we can't just push to the local worker thread.
+        // Also, this might be an in-place scope.
+        self.base.registry.inject_or_push(job_ref);
+    }
+
+    /// Spawns a job into every thread of the fork-join scope `self`. This job will
+    /// execute on each thread sometime before the fork-join scope completes. The
+    /// job is specified as a closure, and this closure receives its own reference
+    /// to the scope `self` as argument, as well as a `BroadcastContext`.
+    pub fn spawn_broadcast<BODY>(&self, body: BODY)
+    where
+        BODY: Fn(&Scope<'scope>, BroadcastContext<'_>) + Send + Sync + 'scope,
+    {
+        let scope_ptr = ScopePtr(self);
+        let job = ArcJob::new(move || unsafe {
+            // SAFETY: this job will execute before the scope ends.
+            let scope = scope_ptr.as_ref();
+            let body = &body;
+            let func = move || BroadcastContext::with(move |ctx| body(scope, ctx));
+            ScopeBase::execute_job(&scope.base, func)
+        });
+        self.base.inject_broadcast(job)
+    }
+}
+
+impl<'scope> ScopeFifo<'scope> {
+    fn new(owner: Option<&WorkerThread>, registry: Option<&Arc<Registry>>) -> Self {
+        let base = ScopeBase::new(owner, registry);
+        let num_threads = base.registry.num_threads();
+        let fifos = (0..num_threads).map(|_| JobFifo::new()).collect();
+        ScopeFifo { base, fifos }
+    }
+
+    /// Spawns a job into the fork-join scope `self`. This job will
+    /// execute sometime before the fork-join scope completes. The
+    /// job is specified as a closure, and this closure receives its
+    /// own reference to the scope `self` as argument. This can be
+    /// used to inject new jobs into `self`.
+    ///
+    /// # See also
+    ///
+    /// This method is akin to [`Scope::spawn()`], but with a FIFO
+    /// priority. The [`scope_fifo` function] has more details about
+    /// this distinction.
+    ///
+    /// [`Scope::spawn()`]: struct.Scope.html#method.spawn
+    /// [`scope_fifo` function]: fn.scope_fifo.html
+    pub fn spawn_fifo<BODY>(&self, body: BODY)
+    where
+        BODY: FnOnce(&ScopeFifo<'scope>) + Send + 'scope,
+    {
+        let scope_ptr = ScopePtr(self);
+        let job = HeapJob::new(self.base.tlv, move || unsafe {
+            // SAFETY: this job will execute before the scope ends.
+            let scope = scope_ptr.as_ref();
+            ScopeBase::execute_job(&scope.base, move || body(scope))
+        });
+        let job_ref = self.base.heap_job_ref(job);
+
+        // If we're in the pool, use our scope's private fifo for this thread to execute
+        // in a locally-FIFO order. Otherwise, just use the pool's global injector.
+        match self.base.registry.current_thread() {
+            Some(worker) => {
+                let fifo = &self.fifos[worker.index()];
+                // SAFETY: this job will execute before the scope ends.
+                unsafe { worker.push(fifo.push(job_ref)) };
+            }
+            None => self.base.registry.inject(job_ref),
+        }
+    }
+
+    /// Spawns a job into every thread of the fork-join scope `self`. This job will
+    /// execute on each thread sometime before the fork-join scope completes. The
+    /// job is specified as a closure, and this closure receives its own reference
+    /// to the scope `self` as argument, as well as a `BroadcastContext`.
+    pub fn spawn_broadcast<BODY>(&self, body: BODY)
+    where
+        BODY: Fn(&ScopeFifo<'scope>, BroadcastContext<'_>) + Send + Sync + 'scope,
+    {
+        let scope_ptr = ScopePtr(self);
+        let job = ArcJob::new(move || unsafe {
+            // SAFETY: this job will execute before the scope ends.
+            let scope = scope_ptr.as_ref();
+            let body = &body;
+            let func = move || BroadcastContext::with(move |ctx| body(scope, ctx));
+            ScopeBase::execute_job(&scope.base, func)
+        });
+        self.base.inject_broadcast(job)
+    }
+}
+
+impl<'scope> ScopeBase<'scope> {
+    /// Creates the base of a new scope for the given registry
+    fn new(owner: Option<&WorkerThread>, registry: Option<&Arc<Registry>>) -> Self {
+        let registry = registry.unwrap_or_else(|| match owner {
+            Some(owner) => owner.registry(),
+            None => global_registry(),
+        });
+
+        ScopeBase {
+            registry: Arc::clone(registry),
+            panic: AtomicPtr::new(ptr::null_mut()),
+            job_completed_latch: CountLatch::new(owner),
+            marker: PhantomData,
+            tlv: tlv::get(),
+        }
+    }
+
+    fn heap_job_ref<FUNC>(&self, job: Box<HeapJob<FUNC>>) -> JobRef
+    where
+        FUNC: FnOnce() + Send + 'scope,
+    {
+        unsafe {
+            self.job_completed_latch.increment();
+            job.into_job_ref()
+        }
+    }
+
+    fn inject_broadcast<FUNC>(&self, job: Arc<ArcJob<FUNC>>)
+    where
+        FUNC: Fn() + Send + Sync + 'scope,
+    {
+        let n_threads = self.registry.num_threads();
+        let job_refs = (0..n_threads).map(|_| unsafe {
+            self.job_completed_latch.increment();
+            ArcJob::as_job_ref(&job)
+        });
+
+        self.registry.inject_broadcast(job_refs);
+    }
+
+    /// Executes `func` as a job, either aborting or executing as
+    /// appropriate.
+    fn complete<FUNC, R>(&self, owner: Option<&WorkerThread>, func: FUNC) -> R
+    where
+        FUNC: FnOnce() -> R,
+    {
+        let result = unsafe { Self::execute_job_closure(self, func) };
+        self.job_completed_latch.wait(owner);
+
+        // Restore the TLV if we ran some jobs while waiting
+        tlv::set(self.tlv);
+
+        self.maybe_propagate_panic();
+        result.unwrap() // only None if `op` panicked, and that would have been propagated
+    }
+
+    /// Executes `func` as a job, either aborting or executing as
+    /// appropriate.
+    unsafe fn execute_job<FUNC>(this: *const Self, func: FUNC)
+    where
+        FUNC: FnOnce(),
+    {
+        let _: Option<()> = unsafe { Self::execute_job_closure(this, func) };
+    }
+
+    /// Executes `func` as a job in scope. Adjusts the "job completed"
+    /// counters and also catches any panic and stores it into
+    /// `scope`.
+    unsafe fn execute_job_closure<FUNC, R>(this: *const Self, func: FUNC) -> Option<R>
+    where
+        FUNC: FnOnce() -> R,
+    {
+        let result = match unwind::halt_unwinding(func) {
+            Ok(r) => Some(r),
+            Err(err) => {
+                unsafe { (*this).job_panicked(err) };
+                None
+            }
+        };
+        unsafe { Latch::set(&(*this).job_completed_latch) };
+        result
+    }
+
+    fn job_panicked(&self, err: Box<dyn Any + Send + 'static>) {
+        // capture the first error we see, free the rest
+        if self.panic.load(Ordering::Relaxed).is_null() {
+            let nil = ptr::null_mut();
+            let mut err = ManuallyDrop::new(Box::new(err)); // box up the fat ptr
+            let err_ptr: *mut Box<dyn Any + Send + 'static> = &mut **err;
+            if self
+                .panic
+                .compare_exchange(nil, err_ptr, Ordering::Release, Ordering::Relaxed)
+                .is_ok()
+            {
+                // ownership now transferred into self.panic
+            } else {
+                // another panic raced in ahead of us, so drop ours
+                let _: Box<Box<_>> = ManuallyDrop::into_inner(err);
+            }
+        }
+    }
+
+    fn maybe_propagate_panic(&self) {
+        // propagate panic, if any occurred; at this point, all
+        // outstanding jobs have completed, so we can use a relaxed
+        // ordering:
+        let panic = self.panic.swap(ptr::null_mut(), Ordering::Relaxed);
+        if !panic.is_null() {
+            let value = unsafe { Box::from_raw(panic) };
+
+            // Restore the TLV if we ran some jobs while waiting
+            tlv::set(self.tlv);
+
+            unwind::resume_unwinding(*value);
+        }
+    }
+}
+
+impl<'scope> fmt::Debug for Scope<'scope> {
+    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt.debug_struct("Scope")
+            .field("pool_id", &self.base.registry.id())
+            .field("panic", &self.base.panic)
+            .field("job_completed_latch", &self.base.job_completed_latch)
+            .finish()
+    }
+}
+
+impl<'scope> fmt::Debug for ScopeFifo<'scope> {
+    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt.debug_struct("ScopeFifo")
+            .field("num_fifos", &self.fifos.len())
+            .field("pool_id", &self.base.registry.id())
+            .field("panic", &self.base.panic)
+            .field("job_completed_latch", &self.base.job_completed_latch)
+            .finish()
+    }
+}
+
+/// Used to capture a scope `&Self` pointer in jobs, without faking a lifetime.
+///
+/// Unsafe code is still required to dereference the pointer, but that's fine in
+/// scope jobs that are guaranteed to execute before the scope ends.
+struct ScopePtr<T>(*const T);
+
+// SAFETY: !Send for raw pointers is not for safety, just as a lint
+unsafe impl<T: Sync> Send for ScopePtr<T> {}
+
+// SAFETY: !Sync for raw pointers is not for safety, just as a lint
+unsafe impl<T: Sync> Sync for ScopePtr<T> {}
+
+impl<T> ScopePtr<T> {
+    // Helper to avoid disjoint captures of `scope_ptr.0`
+    unsafe fn as_ref(&self) -> &T {
+        unsafe { &*self.0 }
+    }
+}
diff --git a/compiler/rustc_thread_pool/src/scope/tests.rs b/compiler/rustc_thread_pool/src/scope/tests.rs
new file mode 100644
index 0000000000000..2df3bc67e298b
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/scope/tests.rs
@@ -0,0 +1,607 @@
+use std::iter::once;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::{Barrier, Mutex};
+use std::vec;
+
+use rand::{Rng, SeedableRng};
+use rand_xorshift::XorShiftRng;
+
+use crate::{Scope, ScopeFifo, ThreadPoolBuilder, scope, scope_fifo, unwind};
+
+#[test]
+fn scope_empty() {
+    scope(|_| {});
+}
+
+#[test]
+fn scope_result() {
+    let x = scope(|_| 22);
+    assert_eq!(x, 22);
+}
+
+#[test]
+fn scope_two() {
+    let counter = &AtomicUsize::new(0);
+    scope(|s| {
+        s.spawn(move |_| {
+            counter.fetch_add(1, Ordering::SeqCst);
+        });
+        s.spawn(move |_| {
+            counter.fetch_add(10, Ordering::SeqCst);
+        });
+    });
+
+    let v = counter.load(Ordering::SeqCst);
+    assert_eq!(v, 11);
+}
+
+#[test]
+fn scope_divide_and_conquer() {
+    let counter_p = &AtomicUsize::new(0);
+    scope(|s| s.spawn(move |s| divide_and_conquer(s, counter_p, 1024)));
+
+    let counter_s = &AtomicUsize::new(0);
+    divide_and_conquer_seq(counter_s, 1024);
+
+    let p = counter_p.load(Ordering::SeqCst);
+    let s = counter_s.load(Ordering::SeqCst);
+    assert_eq!(p, s);
+}
+
+fn divide_and_conquer<'scope>(scope: &Scope<'scope>, counter: &'scope AtomicUsize, size: usize) {
+    if size > 1 {
+        scope.spawn(move |scope| divide_and_conquer(scope, counter, size / 2));
+        scope.spawn(move |scope| divide_and_conquer(scope, counter, size / 2));
+    } else {
+        // count the leaves
+        counter.fetch_add(1, Ordering::SeqCst);
+    }
+}
+
+fn divide_and_conquer_seq(counter: &AtomicUsize, size: usize) {
+    if size > 1 {
+        divide_and_conquer_seq(counter, size / 2);
+        divide_and_conquer_seq(counter, size / 2);
+    } else {
+        // count the leaves
+        counter.fetch_add(1, Ordering::SeqCst);
+    }
+}
+
+struct Tree<T: Send> {
+    value: T,
+    children: Vec<Tree<T>>,
+}
+
+impl<T: Send> Tree<T> {
+    fn iter(&self) -> vec::IntoIter<&T> {
+        once(&self.value)
+            .chain(self.children.iter().flat_map(Tree::iter))
+            .collect::<Vec<_>>() // seems like it shouldn't be needed... but prevents overflow
+            .into_iter()
+    }
+
+    fn update<OP>(&mut self, op: OP)
+    where
+        OP: Fn(&mut T) + Sync,
+        T: Send,
+    {
+        scope(|s| self.update_in_scope(&op, s));
+    }
+
+    fn update_in_scope<'scope, OP>(&'scope mut self, op: &'scope OP, scope: &Scope<'scope>)
+    where
+        OP: Fn(&mut T) + Sync,
+    {
+        let Tree { ref mut value, ref mut children } = *self;
+        scope.spawn(move |scope| {
+            for child in children {
+                scope.spawn(move |scope| child.update_in_scope(op, scope));
+            }
+        });
+
+        op(value);
+    }
+}
+
+fn random_tree(depth: usize) -> Tree<u32> {
+    assert!(depth > 0);
+    let mut seed = <XorShiftRng as SeedableRng>::Seed::default();
+    (0..).zip(seed.as_mut()).for_each(|(i, x)| *x = i);
+    let mut rng = XorShiftRng::from_seed(seed);
+    random_tree1(depth, &mut rng)
+}
+
+fn random_tree1(depth: usize, rng: &mut XorShiftRng) -> Tree<u32> {
+    let children = if depth == 0 {
+        vec![]
+    } else {
+        (0..rng.random_range(0..4)) // somewhere between 0 and 3 children at each level
+            .map(|_| random_tree1(depth - 1, rng))
+            .collect()
+    };
+
+    Tree { value: rng.random_range(0..1_000_000), children }
+}
+
+#[test]
+fn update_tree() {
+    let mut tree: Tree<u32> = random_tree(10);
+    let values: Vec<u32> = tree.iter().cloned().collect();
+    tree.update(|v| *v += 1);
+    let new_values: Vec<u32> = tree.iter().cloned().collect();
+    assert_eq!(values.len(), new_values.len());
+    for (&i, &j) in values.iter().zip(&new_values) {
+        assert_eq!(i + 1, j);
+    }
+}
+
+/// Check that if you have a chain of scoped tasks where T0 spawns T1
+/// spawns T2 and so forth down to Tn, the stack space should not grow
+/// linearly with N. We test this by some unsafe hackery and
+/// permitting an approx 10% change with a 10x input change.
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn linear_stack_growth() {
+    let builder = ThreadPoolBuilder::new().num_threads(1);
+    let pool = builder.build().unwrap();
+    pool.install(|| {
+        let mut max_diff = Mutex::new(0);
+        let bottom_of_stack = 0;
+        scope(|s| the_final_countdown(s, &bottom_of_stack, &max_diff, 5));
+        let diff_when_5 = *max_diff.get_mut().unwrap() as f64;
+
+        scope(|s| the_final_countdown(s, &bottom_of_stack, &max_diff, 500));
+        let diff_when_500 = *max_diff.get_mut().unwrap() as f64;
+
+        let ratio = diff_when_5 / diff_when_500;
+        assert!(ratio > 0.9 && ratio < 1.1, "stack usage ratio out of bounds: {}", ratio);
+    });
+}
+
+fn the_final_countdown<'scope>(
+    s: &Scope<'scope>,
+    bottom_of_stack: &'scope i32,
+    max: &'scope Mutex<usize>,
+    n: usize,
+) {
+    let top_of_stack = 0;
+    let p = bottom_of_stack as *const i32 as usize;
+    let q = &top_of_stack as *const i32 as usize;
+    let diff = if p > q { p - q } else { q - p };
+
+    let mut data = max.lock().unwrap();
+    *data = Ord::max(diff, *data);
+
+    if n > 0 {
+        s.spawn(move |s| the_final_countdown(s, bottom_of_stack, max, n - 1));
+    }
+}
+
+#[test]
+#[should_panic(expected = "Hello, world!")]
+fn panic_propagate_scope() {
+    scope(|_| panic!("Hello, world!"));
+}
+
+#[test]
+#[should_panic(expected = "Hello, world!")]
+fn panic_propagate_spawn() {
+    scope(|s| s.spawn(|_| panic!("Hello, world!")));
+}
+
+#[test]
+#[should_panic(expected = "Hello, world!")]
+fn panic_propagate_nested_spawn() {
+    scope(|s| s.spawn(|s| s.spawn(|s| s.spawn(|_| panic!("Hello, world!")))));
+}
+
+#[test]
+#[should_panic(expected = "Hello, world!")]
+fn panic_propagate_nested_scope_spawn() {
+    scope(|s| s.spawn(|_| scope(|s| s.spawn(|_| panic!("Hello, world!")))));
+}
+
+#[test]
+#[cfg_attr(not(panic = "unwind"), ignore)]
+fn panic_propagate_still_execute_1() {
+    let mut x = false;
+    let result = unwind::halt_unwinding(|| {
+        scope(|s| {
+            s.spawn(|_| panic!("Hello, world!")); // job A
+            s.spawn(|_| x = true); // job B, should still execute even though A panics
+        });
+    });
+    match result {
+        Ok(_) => panic!("failed to propagate panic"),
+        Err(_) => assert!(x, "job b failed to execute"),
+    }
+}
+
+#[test]
+#[cfg_attr(not(panic = "unwind"), ignore)]
+fn panic_propagate_still_execute_2() {
+    let mut x = false;
+    let result = unwind::halt_unwinding(|| {
+        scope(|s| {
+            s.spawn(|_| x = true); // job B, should still execute even though A panics
+            s.spawn(|_| panic!("Hello, world!")); // job A
+        });
+    });
+    match result {
+        Ok(_) => panic!("failed to propagate panic"),
+        Err(_) => assert!(x, "job b failed to execute"),
+    }
+}
+
+#[test]
+#[cfg_attr(not(panic = "unwind"), ignore)]
+fn panic_propagate_still_execute_3() {
+    let mut x = false;
+    let result = unwind::halt_unwinding(|| {
+        scope(|s| {
+            s.spawn(|_| x = true); // spawned job should still execute despite later panic
+            panic!("Hello, world!");
+        });
+    });
+    match result {
+        Ok(_) => panic!("failed to propagate panic"),
+        Err(_) => assert!(x, "panic after spawn, spawn failed to execute"),
+    }
+}
+
+#[test]
+#[cfg_attr(not(panic = "unwind"), ignore)]
+fn panic_propagate_still_execute_4() {
+    let mut x = false;
+    let result = unwind::halt_unwinding(|| {
+        scope(|s| {
+            s.spawn(|_| panic!("Hello, world!"));
+            x = true;
+        });
+    });
+    match result {
+        Ok(_) => panic!("failed to propagate panic"),
+        Err(_) => assert!(x, "panic in spawn tainted scope"),
+    }
+}
+
+macro_rules! test_order {
+    ($scope:ident => $spawn:ident) => {{
+        let builder = ThreadPoolBuilder::new().num_threads(1);
+        let pool = builder.build().unwrap();
+        pool.install(|| {
+            let vec = Mutex::new(vec![]);
+            $scope(|scope| {
+                let vec = &vec;
+                for i in 0..10 {
+                    scope.$spawn(move |scope| {
+                        for j in 0..10 {
+                            scope.$spawn(move |_| {
+                                vec.lock().unwrap().push(i * 10 + j);
+                            });
+                        }
+                    });
+                }
+            });
+            vec.into_inner().unwrap()
+        })
+    }};
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn lifo_order() {
+    // In the absence of stealing, `scope()` runs its `spawn()` jobs in LIFO order.
+    let vec = test_order!(scope => spawn);
+    let expected: Vec<i32> = (0..100).rev().collect(); // LIFO -> reversed
+    assert_eq!(vec, expected);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn fifo_order() {
+    // In the absence of stealing, `scope_fifo()` runs its `spawn_fifo()` jobs in FIFO order.
+    let vec = test_order!(scope_fifo => spawn_fifo);
+    let expected: Vec<i32> = (0..100).collect(); // FIFO -> natural order
+    assert_eq!(vec, expected);
+}
+
+macro_rules! test_nested_order {
+    ($outer_scope:ident => $outer_spawn:ident,
+     $inner_scope:ident => $inner_spawn:ident) => {{
+        let builder = ThreadPoolBuilder::new().num_threads(1);
+        let pool = builder.build().unwrap();
+        pool.install(|| {
+            let vec = Mutex::new(vec![]);
+            $outer_scope(|scope| {
+                let vec = &vec;
+                for i in 0..10 {
+                    scope.$outer_spawn(move |_| {
+                        $inner_scope(|scope| {
+                            for j in 0..10 {
+                                scope.$inner_spawn(move |_| {
+                                    vec.lock().unwrap().push(i * 10 + j);
+                                });
+                            }
+                        });
+                    });
+                }
+            });
+            vec.into_inner().unwrap()
+        })
+    }};
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn nested_lifo_order() {
+    // In the absence of stealing, `scope()` runs its `spawn()` jobs in LIFO order.
+    let vec = test_nested_order!(scope => spawn, scope => spawn);
+    let expected: Vec<i32> = (0..100).rev().collect(); // LIFO -> reversed
+    assert_eq!(vec, expected);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn nested_fifo_order() {
+    // In the absence of stealing, `scope_fifo()` runs its `spawn_fifo()` jobs in FIFO order.
+    let vec = test_nested_order!(scope_fifo => spawn_fifo, scope_fifo => spawn_fifo);
+    let expected: Vec<i32> = (0..100).collect(); // FIFO -> natural order
+    assert_eq!(vec, expected);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn nested_lifo_fifo_order() {
+    // LIFO on the outside, FIFO on the inside
+    let vec = test_nested_order!(scope => spawn, scope_fifo => spawn_fifo);
+    let expected: Vec<i32> = (0..10).rev().flat_map(|i| (0..10).map(move |j| i * 10 + j)).collect();
+    assert_eq!(vec, expected);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn nested_fifo_lifo_order() {
+    // FIFO on the outside, LIFO on the inside
+    let vec = test_nested_order!(scope_fifo => spawn_fifo, scope => spawn);
+    let expected: Vec<i32> = (0..10).flat_map(|i| (0..10).rev().map(move |j| i * 10 + j)).collect();
+    assert_eq!(vec, expected);
+}
+
+macro_rules! spawn_push {
+    ($scope:ident . $spawn:ident, $vec:ident, $i:expr) => {{
+        $scope.$spawn(move |_| $vec.lock().unwrap().push($i));
+    }};
+}
+
+/// Test spawns pushing a series of numbers, interleaved
+/// such that negative values are using an inner scope.
+macro_rules! test_mixed_order {
+    ($outer_scope:ident => $outer_spawn:ident,
+     $inner_scope:ident => $inner_spawn:ident) => {{
+        let builder = ThreadPoolBuilder::new().num_threads(1);
+        let pool = builder.build().unwrap();
+        pool.install(|| {
+            let vec = Mutex::new(vec![]);
+            $outer_scope(|outer_scope| {
+                let vec = &vec;
+                spawn_push!(outer_scope.$outer_spawn, vec, 0);
+                $inner_scope(|inner_scope| {
+                    spawn_push!(inner_scope.$inner_spawn, vec, -1);
+                    spawn_push!(outer_scope.$outer_spawn, vec, 1);
+                    spawn_push!(inner_scope.$inner_spawn, vec, -2);
+                    spawn_push!(outer_scope.$outer_spawn, vec, 2);
+                    spawn_push!(inner_scope.$inner_spawn, vec, -3);
+                });
+                spawn_push!(outer_scope.$outer_spawn, vec, 3);
+            });
+            vec.into_inner().unwrap()
+        })
+    }};
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn mixed_lifo_order() {
+    // NB: the end of the inner scope makes us execute some of the outer scope
+    // before they've all been spawned, so they're not perfectly LIFO.
+    let vec = test_mixed_order!(scope => spawn, scope => spawn);
+    let expected = vec![-3, 2, -2, 1, -1, 3, 0];
+    assert_eq!(vec, expected);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn mixed_fifo_order() {
+    let vec = test_mixed_order!(scope_fifo => spawn_fifo, scope_fifo => spawn_fifo);
+    let expected = vec![-1, 0, -2, 1, -3, 2, 3];
+    assert_eq!(vec, expected);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn mixed_lifo_fifo_order() {
+    // NB: the end of the inner scope makes us execute some of the outer scope
+    // before they've all been spawned, so they're not perfectly LIFO.
+    let vec = test_mixed_order!(scope => spawn, scope_fifo => spawn_fifo);
+    let expected = vec![-1, 2, -2, 1, -3, 3, 0];
+    assert_eq!(vec, expected);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn mixed_fifo_lifo_order() {
+    let vec = test_mixed_order!(scope_fifo => spawn_fifo, scope => spawn);
+    let expected = vec![-3, 0, -2, 1, -1, 2, 3];
+    assert_eq!(vec, expected);
+}
+
+#[test]
+fn static_scope() {
+    static COUNTER: AtomicUsize = AtomicUsize::new(0);
+
+    let mut range = 0..100;
+    let sum = range.clone().sum();
+    let iter = &mut range;
+
+    COUNTER.store(0, Ordering::Relaxed);
+    scope(|s: &Scope<'static>| {
+        // While we're allowed the locally borrowed iterator,
+        // the spawns must be static.
+        for i in iter {
+            s.spawn(move |_| {
+                COUNTER.fetch_add(i, Ordering::Relaxed);
+            });
+        }
+    });
+
+    assert_eq!(COUNTER.load(Ordering::Relaxed), sum);
+}
+
+#[test]
+fn static_scope_fifo() {
+    static COUNTER: AtomicUsize = AtomicUsize::new(0);
+
+    let mut range = 0..100;
+    let sum = range.clone().sum();
+    let iter = &mut range;
+
+    COUNTER.store(0, Ordering::Relaxed);
+    scope_fifo(|s: &ScopeFifo<'static>| {
+        // While we're allowed the locally borrowed iterator,
+        // the spawns must be static.
+        for i in iter {
+            s.spawn_fifo(move |_| {
+                COUNTER.fetch_add(i, Ordering::Relaxed);
+            });
+        }
+    });
+
+    assert_eq!(COUNTER.load(Ordering::Relaxed), sum);
+}
+
+#[test]
+fn mixed_lifetime_scope() {
+    fn increment<'slice, 'counter>(counters: &'slice [&'counter AtomicUsize]) {
+        scope(move |s: &Scope<'counter>| {
+            // We can borrow 'slice here, but the spawns can only borrow 'counter.
+            for &c in counters {
+                s.spawn(move |_| {
+                    c.fetch_add(1, Ordering::Relaxed);
+                });
+            }
+        });
+    }
+
+    let counter = AtomicUsize::new(0);
+    increment(&[&counter; 100]);
+    assert_eq!(counter.into_inner(), 100);
+}
+
+#[test]
+fn mixed_lifetime_scope_fifo() {
+    fn increment<'slice, 'counter>(counters: &'slice [&'counter AtomicUsize]) {
+        scope_fifo(move |s: &ScopeFifo<'counter>| {
+            // We can borrow 'slice here, but the spawns can only borrow 'counter.
+            for &c in counters {
+                s.spawn_fifo(move |_| {
+                    c.fetch_add(1, Ordering::Relaxed);
+                });
+            }
+        });
+    }
+
+    let counter = AtomicUsize::new(0);
+    increment(&[&counter; 100]);
+    assert_eq!(counter.into_inner(), 100);
+}
+
+#[test]
+fn scope_spawn_broadcast() {
+    let sum = AtomicUsize::new(0);
+    let n = scope(|s| {
+        s.spawn_broadcast(|_, ctx| {
+            sum.fetch_add(ctx.index(), Ordering::Relaxed);
+        });
+        crate::current_num_threads()
+    });
+    assert_eq!(sum.into_inner(), n * (n - 1) / 2);
+}
+
+#[test]
+fn scope_fifo_spawn_broadcast() {
+    let sum = AtomicUsize::new(0);
+    let n = scope_fifo(|s| {
+        s.spawn_broadcast(|_, ctx| {
+            sum.fetch_add(ctx.index(), Ordering::Relaxed);
+        });
+        crate::current_num_threads()
+    });
+    assert_eq!(sum.into_inner(), n * (n - 1) / 2);
+}
+
+#[test]
+fn scope_spawn_broadcast_nested() {
+    let sum = AtomicUsize::new(0);
+    let n = scope(|s| {
+        s.spawn_broadcast(|s, _| {
+            s.spawn_broadcast(|_, ctx| {
+                sum.fetch_add(ctx.index(), Ordering::Relaxed);
+            });
+        });
+        crate::current_num_threads()
+    });
+    assert_eq!(sum.into_inner(), n * n * (n - 1) / 2);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn scope_spawn_broadcast_barrier() {
+    let barrier = Barrier::new(8);
+    let pool = ThreadPoolBuilder::new().num_threads(7).build().unwrap();
+    pool.in_place_scope(|s| {
+        s.spawn_broadcast(|_, _| {
+            barrier.wait();
+        });
+        barrier.wait();
+    });
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn scope_spawn_broadcast_panic_one() {
+    let count = AtomicUsize::new(0);
+    let pool = ThreadPoolBuilder::new().num_threads(7).build().unwrap();
+    let result = crate::unwind::halt_unwinding(|| {
+        pool.scope(|s| {
+            s.spawn_broadcast(|_, ctx| {
+                count.fetch_add(1, Ordering::Relaxed);
+                if ctx.index() == 3 {
+                    panic!("Hello, world!");
+                }
+            });
+        });
+    });
+    assert_eq!(count.into_inner(), 7);
+    assert!(result.is_err(), "broadcast panic should propagate!");
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn scope_spawn_broadcast_panic_many() {
+    let count = AtomicUsize::new(0);
+    let pool = ThreadPoolBuilder::new().num_threads(7).build().unwrap();
+    let result = crate::unwind::halt_unwinding(|| {
+        pool.scope(|s| {
+            s.spawn_broadcast(|_, ctx| {
+                count.fetch_add(1, Ordering::Relaxed);
+                if ctx.index() % 2 == 0 {
+                    panic!("Hello, world!");
+                }
+            });
+        });
+    });
+    assert_eq!(count.into_inner(), 7);
+    assert!(result.is_err(), "broadcast panic should propagate!");
+}
diff --git a/compiler/rustc_thread_pool/src/sleep/README.md b/compiler/rustc_thread_pool/src/sleep/README.md
new file mode 100644
index 0000000000000..1e11da55f4a4e
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/sleep/README.md
@@ -0,0 +1,252 @@
+# Introduction: the sleep module
+
+The code in this module governs when worker threads should go to
+sleep. The system used in this code was introduced in [Rayon RFC #5].
+There is also a [video walkthrough] available. Both of those may be
+valuable resources to understanding the code, though naturally they
+will also grow stale over time. The comments in this file are
+extracted from the RFC and meant to be kept up to date.
+
+[Rayon RFC #5]: https://github.com/rayon-rs/rfcs/pull/5
+[video walkthrough]: https://youtu.be/HvmQsE5M4cY
+
+# The `Sleep` struct
+
+The `Sleep` struct is embedded into each registry. It performs several functions:
+
+* It tracks when workers are awake or asleep.
+* It decides how long a worker should look for work before it goes to sleep,
+  via a callback that is invoked periodically from the worker's search loop.
+* It is notified when latches are set, jobs are published, or other
+  events occur, and it will go and wake the appropriate threads if
+  they are sleeping.
+
+# Thread states
+
+There are three main thread states:
+
+* An **active** thread is one that is actively executing a job.
+* An **idle** thread is one that is searching for work to do. It will be
+  trying to steal work or pop work from the global injector queue.
+* A **sleeping** thread is one that is blocked on a condition variable,
+  waiting to be awoken.
+
+We sometimes refer to the final two states collectively as **inactive**.
+Threads begin as idle but transition to idle and finally sleeping when
+they're unable to find work to do.
+
+## Sleepy threads
+
+There is one other special state worth mentioning. During the idle state,
+threads can get **sleepy**. A sleepy thread is still idle, in that it is still
+searching for work, but it is *about* to go to sleep after it does one more
+search (or some other number, potentially). When a thread enters the sleepy
+state, it signals (via the **jobs event counter**, described below) that it is
+about to go to sleep. If new work is published, this will lead to the counter
+being adjusted. When the thread actually goes to sleep, it will (hopefully, but
+not guaranteed) see that the counter has changed and elect not to sleep, but
+instead to search again. See the section on the **jobs event counter** for more
+details.
+
+# The counters
+
+One of the key structs in the sleep module is `AtomicCounters`, found in
+`counters.rs`. It packs three counters into one atomically managed value:
+
+* Two **thread counters**, which track the number of threads in a particular state.
+* The **jobs event counter**, which is used to signal when new work is available.
+  It (sort of) tracks the number of jobs posted, but not quite, and it can rollover.
+
+## Thread counters
+
+There are two thread counters, one that tracks **inactive** threads and one that
+tracks **sleeping** threads. From this, one can deduce the number of threads
+that are idle by subtracting sleeping threads from inactive threads. We track
+the counters in this way because it permits simpler atomic operations. One can
+increment the number of sleeping threads (and thus decrease the number of idle
+threads) simply by doing one atomic increment, for example. Similarly, one can
+decrease the number of sleeping threads (and increase the number of idle
+threads) through one atomic decrement.
+
+These counters are adjusted as follows:
+
+* When a thread enters the idle state: increment the inactive thread counter.
+* When a thread enters the sleeping state: increment the sleeping thread counter.
+* When a thread awakens a sleeping thread: decrement the sleeping thread counter.
+  * Subtle point: the thread that *awakens* the sleeping thread decrements the
+    counter, not the thread that is *sleeping*. This is because there is a delay
+    between signaling a thread to wake and the thread actually waking:
+    decrementing the counter when awakening the thread means that other threads
+    that may be posting work will see the up-to-date value that much faster.
+* When a thread finds work, exiting the idle state: decrement the inactive
+  thread counter.
+
+## Jobs event counter
+
+The final counter is the **jobs event counter**. The role of this counter is to
+help sleepy threads detect when new work is posted in a lightweight fashion. In
+its simplest form, we would simply have a counter that gets incremented each
+time a new job is posted. This way, when a thread gets sleepy, it could read the
+counter, and then compare to see if the value has changed before it actually
+goes to sleep. But this [turns out to be too expensive] in practice, so we use a
+somewhat more complex scheme.
+
+[turns out to be too expensive]: https://github.com/rayon-rs/rayon/pull/746#issuecomment-624802747
+
+The idea is that the counter toggles between two states, depending on whether
+its value is even or odd (or, equivalently, on the value of its low bit):
+
+* Even -- If the low bit is zero, then it means that there has been no new work
+  since the last thread got sleepy.
+* Odd -- If the low bit is one, then it means that new work was posted since
+  the last thread got sleepy.
+
+### New work is posted
+
+When new work is posted, we check the value of the counter: if it is even,
+then we increment it by one, so that it becomes odd.
+
+### Worker thread gets sleepy
+
+When a worker thread gets sleepy, it will read the value of the counter. If the
+counter is odd, it will increment the counter so that it is even. Either way, it
+remembers the final value of the counter. The final value will be used later,
+when the thread is going to sleep. If at that time the counter has not changed,
+then we can assume no new jobs have been posted (though note the remote
+possibility of rollover, discussed in detail below).
+
+# Protocol for a worker thread to post work
+
+The full protocol for a thread to post work is as follows
+
+* If the work is posted into the injection queue, then execute a seq-cst fence (see below).
+* Load the counters, incrementing the JEC if it is even so that it is odd.
+* Check if there are idle threads available to handle this new job. If not,
+  and there are sleeping threads, then wake one or more threads.
+
+# Protocol for a worker thread to fall asleep
+
+The full protocol for a thread to fall asleep is as follows:
+
+* After completing all its jobs, the worker goes idle and begins to
+  search for work. As it searches, it counts "rounds". In each round,
+  it searches all other work threads' queues, plus the 'injector queue' for
+  work injected from the outside. If work is found in this search, the thread
+  becomes active again and hence restarts this protocol from the top.
+* After a certain number of rounds, the thread "gets sleepy" and executes `get_sleepy`
+  above, remembering the `final_value` of the JEC. It does one more search for work.
+* If no work is found, the thread atomically:
+  * Checks the JEC to see that it has not changed from `final_value`.
+    * If it has, then the thread goes back to searching for work. We reset to
+      just before we got sleepy, so that we will do one more search
+      before attempting to sleep again (rather than searching for many rounds).
+  * Increments the number of sleeping threads by 1.
+* The thread then executes a seq-cst fence operation (see below).
+* The thread then does one final check for injected jobs (see below). If any
+  are available, it returns to the 'pre-sleepy' state as if the JEC had changed.
+* The thread waits to be signaled. Once signaled, it returns to the idle state.
+
+# The jobs event counter and deadlock
+
+As described in the section on the JEC, the main concern around going to sleep
+is avoiding a race condition wherein:
+
+* Thread A looks for work, finds none.
+* Thread B posts work but sees no sleeping threads.
+* Thread A goes to sleep.
+
+The JEC protocol largely prevents this, but due to rollover, this prevention is
+not complete. It is possible -- if unlikely -- that enough activity occurs for
+Thread A to observe the same JEC value that it saw when getting sleepy. If the
+new work being published came from *inside* the thread-pool, then this race
+condition isn't too harmful. It means that we have fewer workers processing the
+work then we should, but we won't deadlock. This seems like an acceptable risk
+given that this is unlikely in practice.
+
+However, if the work was posted as an *external* job, that is a problem. In that
+case, it's possible that all of our workers could go to sleep, and the external
+job would never get processed. To prevent that, the sleeping protocol includes
+one final check to see if the injector queue is empty before fully falling
+asleep. Note that this final check occurs **after** the number of sleeping
+threads has been incremented. We are not concerned therefore with races against
+injections that occur after that increment, only before.
+
+Unfortunately, there is one rather subtle point concerning this final check:
+we wish to avoid the possibility that:
+
+* work is pushed into the injection queue by an outside thread X,
+* the sleepy thread S sees the JEC but it has rolled over and is equal
+* the sleepy thread S reads the injection queue but does not see the work posted by X.
+
+This is possible because the C++ memory model typically offers guarantees of the
+form "if you see the access A, then you must see those other accesses" -- but it
+doesn't guarantee that you will see the access A (i.e., if you think of
+processors with independent caches, you may be operating on very out of date
+cache state).
+
+## Using seq-cst fences to prevent deadlock
+
+To overcome this problem, we have inserted two sequentially consistent fence
+operations into the protocols above:
+
+* One fence occurs after work is posted into the injection queue, but before the
+  counters are read (including the number of sleeping threads).
+  * Note that no fence is needed for work posted to internal queues, since it is ok
+    to overlook work in that case.
+* One fence occurs after the number of sleeping threads is incremented, but
+  before the injection queue is read.
+
+### Proof sketch
+
+What follows is a "proof sketch" that the protocol is deadlock free. We model
+two relevant bits of memory, the job injector queue J and the atomic counters C.
+
+Consider the actions of the injecting thread:
+
+* PushJob: Job is injected, which can be modeled as an atomic write to J with release semantics.
+* PushFence: A sequentially consistent fence is executed.
+* ReadSleepers: The counters C are read (they may also be incremented, but we just consider the read that comes first).
+
+Meanwhile, the sleepy thread does the following:
+
+* IncSleepers: The number of sleeping threads is incremented, which is atomic exchange to C.
+* SleepFence: A sequentially consistent fence is executed.
+* ReadJob: We look to see if the queue is empty, which is a read of J with acquire semantics.
+
+Either PushFence or SleepFence must come first:
+
+* If PushFence comes first, then PushJob must be visible to ReadJob.
+* If SleepFence comes first, then IncSleepers is visible to ReadSleepers.
+
+# Deadlock detection
+
+This module tracks a number of variables in order to detect deadlocks due to user code blocking.
+These variables are stored in the `SleepData` struct which itself is kept behind a mutex.
+It contains the following fields:
+- `worker_count` - The number of threads in the thread pool.
+- `active_threads` - The number of threads in the thread pool which are running
+                     and aren't blocked in user code or sleeping.
+- `blocked_threads` - The number of threads which are blocked in user code.
+                      This doesn't include threads blocked by Rayon.
+
+User code can indicate blocking by calling `mark_blocked` before blocking and
+calling `mark_unblocked` before unblocking a thread.
+This will adjust `active_threads` and `blocked_threads` accordingly.
+
+When we tickle the thread pool in `Sleep::tickle_cold`, we set `active_threads` to
+`worker_count` - `blocked_threads` since we wake up all Rayon threads, but not thread blocked
+by user code.
+
+A deadlock is detected by checking if `active_threads` is 0 and `blocked_threads` is above 0.
+If we ignored `blocked_threads` we would have a deadlock
+immediately when creating the thread pool.
+We would also deadlock once the thread pool ran out of work.
+It is not possible for Rayon itself to deadlock.
+Deadlocks can only be caused by user code blocking, so this condition doesn't miss any deadlocks.
+
+We check for the deadlock condition when
+threads fall asleep in `mark_unblocked` and in `Sleep::sleep`.
+If there's a deadlock detected we call the user provided deadlock handler while we hold the
+lock to `SleepData`. This means the deadlock handler cannot call `mark_blocked` and
+`mark_unblocked`. The user is expected to handle the deadlock in some non-Rayon thread.
+Once the deadlock handler returns, the thread which called the deadlock handler will go to sleep.
diff --git a/compiler/rustc_thread_pool/src/sleep/counters.rs b/compiler/rustc_thread_pool/src/sleep/counters.rs
new file mode 100644
index 0000000000000..f2682028b96a6
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/sleep/counters.rs
@@ -0,0 +1,273 @@
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+pub(super) struct AtomicCounters {
+    /// Packs together a number of counters. The counters are ordered as
+    /// follows, from least to most significant bits (here, we assuming
+    /// that [`THREADS_BITS`] is equal to 10):
+    ///
+    /// * Bits 0..10: Stores the number of **sleeping threads**
+    /// * Bits 10..20: Stores the number of **inactive threads**
+    /// * Bits 20..: Stores the **job event counter** (JEC)
+    ///
+    /// This uses 10 bits ([`THREADS_BITS`]) to encode the number of threads. Note
+    /// that the total number of bits (and hence the number of bits used for the
+    /// JEC) will depend on whether we are using a 32- or 64-bit architecture.
+    value: AtomicUsize,
+}
+
+#[derive(Copy, Clone)]
+pub(super) struct Counters {
+    word: usize,
+}
+
+/// A value read from the **Jobs Event Counter**.
+/// See the [`README.md`](README.md) for more
+/// coverage of how the jobs event counter works.
+#[derive(Copy, Clone, Debug, PartialEq, PartialOrd)]
+pub(super) struct JobsEventCounter(usize);
+
+impl JobsEventCounter {
+    pub(super) const DUMMY: JobsEventCounter = JobsEventCounter(usize::MAX);
+
+    #[inline]
+    pub(super) fn as_usize(self) -> usize {
+        self.0
+    }
+
+    /// The JEC "is sleepy" if the last thread to increment it was in the
+    /// process of becoming sleepy. This is indicated by its value being *even*.
+    /// When new jobs are posted, they check if the JEC is sleepy, and if so
+    /// they incremented it.
+    #[inline]
+    pub(super) fn is_sleepy(self) -> bool {
+        (self.as_usize() & 1) == 0
+    }
+
+    /// The JEC "is active" if the last thread to increment it was posting new
+    /// work. This is indicated by its value being *odd*. When threads get
+    /// sleepy, they will check if the JEC is active, and increment it.
+    #[inline]
+    pub(super) fn is_active(self) -> bool {
+        !self.is_sleepy()
+    }
+}
+
+/// Number of bits used for the thread counters.
+#[cfg(target_pointer_width = "64")]
+const THREADS_BITS: usize = 16;
+
+#[cfg(target_pointer_width = "32")]
+const THREADS_BITS: usize = 8;
+
+/// Bits to shift to select the sleeping threads
+/// (used with `select_bits`).
+#[allow(clippy::erasing_op)]
+const SLEEPING_SHIFT: usize = 0 * THREADS_BITS;
+
+/// Bits to shift to select the inactive threads
+/// (used with `select_bits`).
+#[allow(clippy::identity_op)]
+const INACTIVE_SHIFT: usize = 1 * THREADS_BITS;
+
+/// Bits to shift to select the JEC
+/// (use JOBS_BITS).
+const JEC_SHIFT: usize = 2 * THREADS_BITS;
+
+/// Max value for the thread counters.
+pub(crate) const THREADS_MAX: usize = (1 << THREADS_BITS) - 1;
+
+/// Constant that can be added to add one sleeping thread.
+const ONE_SLEEPING: usize = 1;
+
+/// Constant that can be added to add one inactive thread.
+/// An inactive thread is either idle, sleepy, or sleeping.
+const ONE_INACTIVE: usize = 1 << INACTIVE_SHIFT;
+
+/// Constant that can be added to add one to the JEC.
+const ONE_JEC: usize = 1 << JEC_SHIFT;
+
+impl AtomicCounters {
+    #[inline]
+    pub(super) fn new() -> AtomicCounters {
+        AtomicCounters { value: AtomicUsize::new(0) }
+    }
+
+    /// Load and return the current value of the various counters.
+    /// This value can then be given to other method which will
+    /// attempt to update the counters via compare-and-swap.
+    #[inline]
+    pub(super) fn load(&self, ordering: Ordering) -> Counters {
+        Counters::new(self.value.load(ordering))
+    }
+
+    #[inline]
+    fn try_exchange(&self, old_value: Counters, new_value: Counters, ordering: Ordering) -> bool {
+        self.value
+            .compare_exchange(old_value.word, new_value.word, ordering, Ordering::Relaxed)
+            .is_ok()
+    }
+
+    /// Adds an inactive thread. This cannot fail.
+    ///
+    /// This should be invoked when a thread enters its idle loop looking
+    /// for work. It is decremented when work is found. Note that it is
+    /// not decremented if the thread transitions from idle to sleepy or sleeping;
+    /// so the number of inactive threads is always greater-than-or-equal
+    /// to the number of sleeping threads.
+    #[inline]
+    pub(super) fn add_inactive_thread(&self) {
+        self.value.fetch_add(ONE_INACTIVE, Ordering::SeqCst);
+    }
+
+    /// Increments the jobs event counter if `increment_when`, when applied to
+    /// the current value, is true. Used to toggle the JEC from even (sleepy) to
+    /// odd (active) or vice versa. Returns the final value of the counters, for
+    /// which `increment_when` is guaranteed to return false.
+    pub(super) fn increment_jobs_event_counter_if(
+        &self,
+        increment_when: impl Fn(JobsEventCounter) -> bool,
+    ) -> Counters {
+        loop {
+            let old_value = self.load(Ordering::SeqCst);
+            if increment_when(old_value.jobs_counter()) {
+                let new_value = old_value.increment_jobs_counter();
+                if self.try_exchange(old_value, new_value, Ordering::SeqCst) {
+                    return new_value;
+                }
+            } else {
+                return old_value;
+            }
+        }
+    }
+
+    /// Subtracts an inactive thread. This cannot fail. It is invoked
+    /// when a thread finds work and hence becomes active. It returns the
+    /// number of sleeping threads to wake up (if any).
+    ///
+    /// See `add_inactive_thread`.
+    #[inline]
+    pub(super) fn sub_inactive_thread(&self) -> usize {
+        let old_value = Counters::new(self.value.fetch_sub(ONE_INACTIVE, Ordering::SeqCst));
+        debug_assert!(
+            old_value.inactive_threads() > 0,
+            "sub_inactive_thread: old_value {:?} has no inactive threads",
+            old_value,
+        );
+        debug_assert!(
+            old_value.sleeping_threads() <= old_value.inactive_threads(),
+            "sub_inactive_thread: old_value {:?} had {} sleeping threads and {} inactive threads",
+            old_value,
+            old_value.sleeping_threads(),
+            old_value.inactive_threads(),
+        );
+
+        // Current heuristic: whenever an inactive thread goes away, if
+        // there are any sleeping threads, wake 'em up.
+        let sleeping_threads = old_value.sleeping_threads();
+        Ord::min(sleeping_threads, 2)
+    }
+
+    /// Subtracts a sleeping thread. This cannot fail, but it is only
+    /// safe to do if you you know the number of sleeping threads is
+    /// non-zero (i.e., because you have just awoken a sleeping
+    /// thread).
+    #[inline]
+    pub(super) fn sub_sleeping_thread(&self) {
+        let old_value = Counters::new(self.value.fetch_sub(ONE_SLEEPING, Ordering::SeqCst));
+        debug_assert!(
+            old_value.sleeping_threads() > 0,
+            "sub_sleeping_thread: old_value {:?} had no sleeping threads",
+            old_value,
+        );
+        debug_assert!(
+            old_value.sleeping_threads() <= old_value.inactive_threads(),
+            "sub_sleeping_thread: old_value {:?} had {} sleeping threads and {} inactive threads",
+            old_value,
+            old_value.sleeping_threads(),
+            old_value.inactive_threads(),
+        );
+    }
+
+    #[inline]
+    pub(super) fn try_add_sleeping_thread(&self, old_value: Counters) -> bool {
+        debug_assert!(
+            old_value.inactive_threads() > 0,
+            "try_add_sleeping_thread: old_value {:?} has no inactive threads",
+            old_value,
+        );
+        debug_assert!(
+            old_value.sleeping_threads() < THREADS_MAX,
+            "try_add_sleeping_thread: old_value {:?} has too many sleeping threads",
+            old_value,
+        );
+
+        let mut new_value = old_value;
+        new_value.word += ONE_SLEEPING;
+
+        self.try_exchange(old_value, new_value, Ordering::SeqCst)
+    }
+}
+
+#[inline]
+fn select_thread(word: usize, shift: usize) -> usize {
+    (word >> shift) & THREADS_MAX
+}
+
+#[inline]
+fn select_jec(word: usize) -> usize {
+    word >> JEC_SHIFT
+}
+
+impl Counters {
+    #[inline]
+    fn new(word: usize) -> Counters {
+        Counters { word }
+    }
+
+    #[inline]
+    fn increment_jobs_counter(self) -> Counters {
+        // We can freely add to JEC because it occupies the most significant bits.
+        // Thus it doesn't overflow into the other counters, just wraps itself.
+        Counters { word: self.word.wrapping_add(ONE_JEC) }
+    }
+
+    #[inline]
+    pub(super) fn jobs_counter(self) -> JobsEventCounter {
+        JobsEventCounter(select_jec(self.word))
+    }
+
+    /// The number of threads that are not actively
+    /// executing work. They may be idle, sleepy, or asleep.
+    #[inline]
+    pub(super) fn inactive_threads(self) -> usize {
+        select_thread(self.word, INACTIVE_SHIFT)
+    }
+
+    #[inline]
+    pub(super) fn awake_but_idle_threads(self) -> usize {
+        debug_assert!(
+            self.sleeping_threads() <= self.inactive_threads(),
+            "sleeping threads: {} > raw idle threads {}",
+            self.sleeping_threads(),
+            self.inactive_threads()
+        );
+        self.inactive_threads() - self.sleeping_threads()
+    }
+
+    #[inline]
+    pub(super) fn sleeping_threads(self) -> usize {
+        select_thread(self.word, SLEEPING_SHIFT)
+    }
+}
+
+impl std::fmt::Debug for Counters {
+    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let word = format!("{:016x}", self.word);
+        fmt.debug_struct("Counters")
+            .field("word", &word)
+            .field("jobs", &self.jobs_counter().0)
+            .field("inactive", &self.inactive_threads())
+            .field("sleeping", &self.sleeping_threads())
+            .finish()
+    }
+}
diff --git a/compiler/rustc_thread_pool/src/sleep/mod.rs b/compiler/rustc_thread_pool/src/sleep/mod.rs
new file mode 100644
index 0000000000000..a9cdf68cc7ebf
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/sleep/mod.rs
@@ -0,0 +1,386 @@
+//! Code that decides when workers should go to sleep. See README.md
+//! for an overview.
+
+use std::sync::atomic::Ordering;
+use std::sync::{Condvar, Mutex};
+use std::thread;
+
+use crossbeam_utils::CachePadded;
+
+use crate::DeadlockHandler;
+use crate::latch::CoreLatch;
+use crate::registry::WorkerThread;
+
+mod counters;
+pub(crate) use self::counters::THREADS_MAX;
+use self::counters::{AtomicCounters, JobsEventCounter};
+
+struct SleepData {
+    /// The number of threads in the thread pool.
+    worker_count: usize,
+
+    /// The number of threads in the thread pool which are running and
+    /// aren't blocked in user code or sleeping.
+    active_threads: usize,
+
+    /// The number of threads which are blocked in user code.
+    /// This doesn't include threads blocked by this module.
+    blocked_threads: usize,
+}
+
+impl SleepData {
+    /// Checks if the conditions for a deadlock holds and if so calls the deadlock handler
+    #[inline]
+    pub(super) fn deadlock_check(&self, deadlock_handler: &Option<Box<DeadlockHandler>>) {
+        if self.active_threads == 0 && self.blocked_threads > 0 {
+            (deadlock_handler.as_ref().unwrap())();
+        }
+    }
+}
+
+/// The `Sleep` struct is embedded into each registry. It governs the waking and sleeping
+/// of workers. It has callbacks that are invoked periodically at significant events,
+/// such as when workers are looping and looking for work, when latches are set, or when
+/// jobs are published, and it either blocks threads or wakes them in response to these
+/// events. See the [`README.md`] in this module for more details.
+///
+/// [`README.md`] README.md
+pub(super) struct Sleep {
+    /// One "sleep state" per worker. Used to track if a worker is sleeping and to have
+    /// them block.
+    worker_sleep_states: Vec<CachePadded<WorkerSleepState>>,
+
+    counters: AtomicCounters,
+
+    data: Mutex<SleepData>,
+}
+
+/// An instance of this struct is created when a thread becomes idle.
+/// It is consumed when the thread finds work, and passed by `&mut`
+/// reference for operations that preserve the idle state. (In other
+/// words, producing one of these structs is evidence the thread is
+/// idle.) It tracks state such as how long the thread has been idle.
+pub(super) struct IdleState {
+    /// What is worker index of the idle thread?
+    worker_index: usize,
+
+    /// How many rounds have we been circling without sleeping?
+    rounds: u32,
+
+    /// Once we become sleepy, what was the sleepy counter value?
+    /// Set to `INVALID_SLEEPY_COUNTER` otherwise.
+    jobs_counter: JobsEventCounter,
+}
+
+/// The "sleep state" for an individual worker.
+#[derive(Default)]
+struct WorkerSleepState {
+    /// Set to true when the worker goes to sleep; set to false when
+    /// the worker is notified or when it wakes.
+    is_blocked: Mutex<bool>,
+
+    condvar: Condvar,
+}
+
+const ROUNDS_UNTIL_SLEEPY: u32 = 32;
+const ROUNDS_UNTIL_SLEEPING: u32 = ROUNDS_UNTIL_SLEEPY + 1;
+
+impl Sleep {
+    pub(super) fn new(n_threads: usize) -> Sleep {
+        assert!(n_threads <= THREADS_MAX);
+        Sleep {
+            worker_sleep_states: (0..n_threads).map(|_| Default::default()).collect(),
+            counters: AtomicCounters::new(),
+            data: Mutex::new(SleepData {
+                worker_count: n_threads,
+                active_threads: n_threads,
+                blocked_threads: 0,
+            }),
+        }
+    }
+
+    /// Mark a Rayon worker thread as blocked. This triggers the deadlock handler
+    /// if no other worker thread is active
+    #[inline]
+    pub(super) fn mark_blocked(&self, deadlock_handler: &Option<Box<DeadlockHandler>>) {
+        let mut data = self.data.lock().unwrap();
+        debug_assert!(data.active_threads > 0);
+        debug_assert!(data.blocked_threads < data.worker_count);
+        debug_assert!(data.active_threads > 0);
+        data.active_threads -= 1;
+        data.blocked_threads += 1;
+
+        data.deadlock_check(deadlock_handler);
+    }
+
+    /// Mark a previously blocked Rayon worker thread as unblocked
+    #[inline]
+    pub(super) fn mark_unblocked(&self) {
+        let mut data = self.data.lock().unwrap();
+        debug_assert!(data.active_threads < data.worker_count);
+        debug_assert!(data.blocked_threads > 0);
+        data.active_threads += 1;
+        data.blocked_threads -= 1;
+    }
+
+    #[inline]
+    pub(super) fn start_looking(&self, worker_index: usize) -> IdleState {
+        self.counters.add_inactive_thread();
+
+        IdleState { worker_index, rounds: 0, jobs_counter: JobsEventCounter::DUMMY }
+    }
+
+    #[inline]
+    pub(super) fn work_found(&self) {
+        // If we were the last idle thread and other threads are still sleeping,
+        // then we should wake up another thread.
+        let threads_to_wake = self.counters.sub_inactive_thread();
+        self.wake_any_threads(threads_to_wake as u32);
+    }
+
+    #[inline]
+    pub(super) fn no_work_found(
+        &self,
+        idle_state: &mut IdleState,
+        latch: &CoreLatch,
+        thread: &WorkerThread,
+    ) {
+        if idle_state.rounds < ROUNDS_UNTIL_SLEEPY {
+            thread::yield_now();
+            idle_state.rounds += 1;
+        } else if idle_state.rounds == ROUNDS_UNTIL_SLEEPY {
+            idle_state.jobs_counter = self.announce_sleepy();
+            idle_state.rounds += 1;
+            thread::yield_now();
+        } else if idle_state.rounds < ROUNDS_UNTIL_SLEEPING {
+            idle_state.rounds += 1;
+            thread::yield_now();
+        } else {
+            debug_assert_eq!(idle_state.rounds, ROUNDS_UNTIL_SLEEPING);
+            self.sleep(idle_state, latch, thread);
+        }
+    }
+
+    #[cold]
+    fn announce_sleepy(&self) -> JobsEventCounter {
+        self.counters.increment_jobs_event_counter_if(JobsEventCounter::is_active).jobs_counter()
+    }
+
+    #[cold]
+    fn sleep(&self, idle_state: &mut IdleState, latch: &CoreLatch, thread: &WorkerThread) {
+        let worker_index = idle_state.worker_index;
+
+        if !latch.get_sleepy() {
+            return;
+        }
+
+        let sleep_state = &self.worker_sleep_states[worker_index];
+        let mut is_blocked = sleep_state.is_blocked.lock().unwrap();
+        debug_assert!(!*is_blocked);
+
+        // Our latch was signalled. We should wake back up fully as we
+        // will have some stuff to do.
+        if !latch.fall_asleep() {
+            idle_state.wake_fully();
+            return;
+        }
+
+        loop {
+            let counters = self.counters.load(Ordering::SeqCst);
+
+            // Check if the JEC has changed since we got sleepy.
+            debug_assert!(idle_state.jobs_counter.is_sleepy());
+            if counters.jobs_counter() != idle_state.jobs_counter {
+                // JEC has changed, so a new job was posted, but for some reason
+                // we didn't see it. We should return to just before the SLEEPY
+                // state so we can do another search and (if we fail to find
+                // work) go back to sleep.
+                idle_state.wake_partly();
+                latch.wake_up();
+                return;
+            }
+
+            // Otherwise, let's move from IDLE to SLEEPING.
+            if self.counters.try_add_sleeping_thread(counters) {
+                break;
+            }
+        }
+
+        // Successfully registered as asleep.
+
+        // We have one last check for injected jobs to do. This protects against
+        // deadlock in the very unlikely event that
+        //
+        // - an external job is being injected while we are sleepy
+        // - that job triggers the rollover over the JEC such that we don't see it
+        // - we are the last active worker thread
+        std::sync::atomic::fence(Ordering::SeqCst);
+        if thread.has_injected_job() {
+            // If we see an externally injected job, then we have to 'wake
+            // ourselves up'. (Ordinarily, `sub_sleeping_thread` is invoked by
+            // the one that wakes us.)
+            self.counters.sub_sleeping_thread();
+        } else {
+            {
+                // Decrement the number of active threads and check for a deadlock
+                let mut data = self.data.lock().unwrap();
+                data.active_threads -= 1;
+                data.deadlock_check(&thread.registry.deadlock_handler);
+            }
+
+            // If we don't see an injected job (the normal case), then flag
+            // ourselves as asleep and wait till we are notified.
+            //
+            // (Note that `is_blocked` is held under a mutex and the mutex was
+            // acquired *before* we incremented the "sleepy counter". This means
+            // that whomever is coming to wake us will have to wait until we
+            // release the mutex in the call to `wait`, so they will see this
+            // boolean as true.)
+            thread.registry.release_thread();
+            *is_blocked = true;
+            while *is_blocked {
+                is_blocked = sleep_state.condvar.wait(is_blocked).unwrap();
+            }
+
+            // Drop `is_blocked` now in case `acquire_thread` blocks
+            drop(is_blocked);
+
+            thread.registry.acquire_thread();
+        }
+
+        // Update other state:
+        idle_state.wake_fully();
+        latch.wake_up();
+    }
+
+    /// Notify the given thread that it should wake up (if it is
+    /// sleeping). When this method is invoked, we typically know the
+    /// thread is asleep, though in rare cases it could have been
+    /// awoken by (e.g.) new work having been posted.
+    pub(super) fn notify_worker_latch_is_set(&self, target_worker_index: usize) {
+        self.wake_specific_thread(target_worker_index);
+    }
+
+    /// Signals that `num_jobs` new jobs were injected into the thread
+    /// pool from outside. This function will ensure that there are
+    /// threads available to process them, waking threads from sleep
+    /// if necessary.
+    ///
+    /// # Parameters
+    ///
+    /// - `num_jobs` -- lower bound on number of jobs available for stealing.
+    ///   We'll try to get at least one thread per job.
+    #[inline]
+    pub(super) fn new_injected_jobs(&self, num_jobs: u32, queue_was_empty: bool) {
+        // This fence is needed to guarantee that threads
+        // as they are about to fall asleep, observe any
+        // new jobs that may have been injected.
+        std::sync::atomic::fence(Ordering::SeqCst);
+
+        self.new_jobs(num_jobs, queue_was_empty)
+    }
+
+    /// Signals that `num_jobs` new jobs were pushed onto a thread's
+    /// local deque. This function will try to ensure that there are
+    /// threads available to process them, waking threads from sleep
+    /// if necessary. However, this is not guaranteed: under certain
+    /// race conditions, the function may fail to wake any new
+    /// threads; in that case the existing thread should eventually
+    /// pop the job.
+    ///
+    /// # Parameters
+    ///
+    /// - `num_jobs` -- lower bound on number of jobs available for stealing.
+    ///   We'll try to get at least one thread per job.
+    #[inline]
+    pub(super) fn new_internal_jobs(&self, num_jobs: u32, queue_was_empty: bool) {
+        self.new_jobs(num_jobs, queue_was_empty)
+    }
+
+    /// Common helper for `new_injected_jobs` and `new_internal_jobs`.
+    #[inline]
+    fn new_jobs(&self, num_jobs: u32, queue_was_empty: bool) {
+        // Read the counters and -- if sleepy workers have announced themselves
+        // -- announce that there is now work available. The final value of `counters`
+        // with which we exit the loop thus corresponds to a state when
+        let counters = self.counters.increment_jobs_event_counter_if(JobsEventCounter::is_sleepy);
+        let num_awake_but_idle = counters.awake_but_idle_threads();
+        let num_sleepers = counters.sleeping_threads();
+
+        if num_sleepers == 0 {
+            // nobody to wake
+            return;
+        }
+
+        // Promote from u16 to u32 so we can interoperate with
+        // num_jobs more easily.
+        let num_awake_but_idle = num_awake_but_idle as u32;
+        let num_sleepers = num_sleepers as u32;
+
+        // If the queue is non-empty, then we always wake up a worker
+        // -- clearly the existing idle jobs aren't enough. Otherwise,
+        // check to see if we have enough idle workers.
+        if !queue_was_empty {
+            let num_to_wake = Ord::min(num_jobs, num_sleepers);
+            self.wake_any_threads(num_to_wake);
+        } else if num_awake_but_idle < num_jobs {
+            let num_to_wake = Ord::min(num_jobs - num_awake_but_idle, num_sleepers);
+            self.wake_any_threads(num_to_wake);
+        }
+    }
+
+    #[cold]
+    fn wake_any_threads(&self, mut num_to_wake: u32) {
+        if num_to_wake > 0 {
+            for i in 0..self.worker_sleep_states.len() {
+                if self.wake_specific_thread(i) {
+                    num_to_wake -= 1;
+                    if num_to_wake == 0 {
+                        return;
+                    }
+                }
+            }
+        }
+    }
+
+    fn wake_specific_thread(&self, index: usize) -> bool {
+        let sleep_state = &self.worker_sleep_states[index];
+
+        let mut is_blocked = sleep_state.is_blocked.lock().unwrap();
+        if *is_blocked {
+            *is_blocked = false;
+
+            // Increment the number of active threads
+            self.data.lock().unwrap().active_threads += 1;
+
+            sleep_state.condvar.notify_one();
+
+            // When the thread went to sleep, it will have incremented
+            // this value. When we wake it, its our job to decrement
+            // it. We could have the thread do it, but that would
+            // introduce a delay between when the thread was
+            // *notified* and when this counter was decremented. That
+            // might mislead people with new work into thinking that
+            // there are sleeping threads that they should try to
+            // wake, when in fact there is nothing left for them to
+            // do.
+            self.counters.sub_sleeping_thread();
+
+            true
+        } else {
+            false
+        }
+    }
+}
+
+impl IdleState {
+    fn wake_fully(&mut self) {
+        self.rounds = 0;
+        self.jobs_counter = JobsEventCounter::DUMMY;
+    }
+
+    fn wake_partly(&mut self) {
+        self.rounds = ROUNDS_UNTIL_SLEEPY;
+        self.jobs_counter = JobsEventCounter::DUMMY;
+    }
+}
diff --git a/compiler/rustc_thread_pool/src/spawn/mod.rs b/compiler/rustc_thread_pool/src/spawn/mod.rs
new file mode 100644
index 0000000000000..040a02bfa6769
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/spawn/mod.rs
@@ -0,0 +1,165 @@
+use std::mem;
+use std::sync::Arc;
+
+use crate::job::*;
+use crate::registry::Registry;
+use crate::tlv::Tlv;
+use crate::unwind;
+
+/// Puts the task into the Rayon threadpool's job queue in the "static"
+/// or "global" scope. Just like a standard thread, this task is not
+/// tied to the current stack frame, and hence it cannot hold any
+/// references other than those with `'static` lifetime. If you want
+/// to spawn a task that references stack data, use [the `scope()`
+/// function][scope] to create a scope.
+///
+/// [scope]: fn.scope.html
+///
+/// Since tasks spawned with this function cannot hold references into
+/// the enclosing stack frame, you almost certainly want to use a
+/// `move` closure as their argument (otherwise, the closure will
+/// typically hold references to any variables from the enclosing
+/// function that you happen to use).
+///
+/// This API assumes that the closure is executed purely for its
+/// side-effects (i.e., it might send messages, modify data protected
+/// by a mutex, or some such thing).
+///
+/// There is no guaranteed order of execution for spawns, given that
+/// other threads may steal tasks at any time. However, they are
+/// generally prioritized in a LIFO order on the thread from which
+/// they were spawned. Other threads always steal from the other end of
+/// the deque, like FIFO order. The idea is that "recent" tasks are
+/// most likely to be fresh in the local CPU's cache, while other
+/// threads can steal older "stale" tasks. For an alternate approach,
+/// consider [`spawn_fifo()`] instead.
+///
+/// [`spawn_fifo()`]: fn.spawn_fifo.html
+///
+/// # Panic handling
+///
+/// If this closure should panic, the resulting panic will be
+/// propagated to the panic handler registered in the `ThreadPoolBuilder`,
+/// if any. See [`ThreadPoolBuilder::panic_handler()`][ph] for more
+/// details.
+///
+/// [ph]: struct.ThreadPoolBuilder.html#method.panic_handler
+///
+/// # Examples
+///
+/// This code creates a Rayon task that increments a global counter.
+///
+/// ```rust
+/// # use rustc_thread_pool as rayon;
+/// use std::sync::atomic::{AtomicUsize, Ordering, ATOMIC_USIZE_INIT};
+///
+/// static GLOBAL_COUNTER: AtomicUsize = ATOMIC_USIZE_INIT;
+///
+/// rayon::spawn(move || {
+///     GLOBAL_COUNTER.fetch_add(1, Ordering::SeqCst);
+/// });
+/// ```
+pub fn spawn<F>(func: F)
+where
+    F: FnOnce() + Send + 'static,
+{
+    // We assert that current registry has not terminated.
+    unsafe { spawn_in(func, &Registry::current()) }
+}
+
+/// Spawns an asynchronous job in `registry.`
+///
+/// Unsafe because `registry` must not yet have terminated.
+pub(super) unsafe fn spawn_in<F>(func: F, registry: &Arc<Registry>)
+where
+    F: FnOnce() + Send + 'static,
+{
+    // We assert that this does not hold any references (we know
+    // this because of the `'static` bound in the interface);
+    // moreover, we assert that the code below is not supposed to
+    // be able to panic, and hence the data won't leak but will be
+    // enqueued into some deque for later execution.
+    let abort_guard = unwind::AbortIfPanic; // just in case we are wrong, and code CAN panic
+    let job_ref = unsafe { spawn_job(func, registry) };
+    registry.inject_or_push(job_ref);
+    mem::forget(abort_guard);
+}
+
+unsafe fn spawn_job<F>(func: F, registry: &Arc<Registry>) -> JobRef
+where
+    F: FnOnce() + Send + 'static,
+{
+    // Ensure that registry cannot terminate until this job has
+    // executed. This ref is decremented at the (*) below.
+    registry.increment_terminate_count();
+
+    HeapJob::new(Tlv::null(), {
+        let registry = Arc::clone(registry);
+        move || {
+            registry.catch_unwind(func);
+            registry.terminate(); // (*) permit registry to terminate now
+        }
+    })
+    .into_static_job_ref()
+}
+
+/// Fires off a task into the Rayon threadpool in the "static" or
+/// "global" scope. Just like a standard thread, this task is not
+/// tied to the current stack frame, and hence it cannot hold any
+/// references other than those with `'static` lifetime. If you want
+/// to spawn a task that references stack data, use [the `scope_fifo()`
+/// function](fn.scope_fifo.html) to create a scope.
+///
+/// The behavior is essentially the same as [the `spawn`
+/// function](fn.spawn.html), except that calls from the same thread
+/// will be prioritized in FIFO order. This is similar to the now-
+/// deprecated [`breadth_first`] option, except the effect is isolated
+/// to relative `spawn_fifo` calls, not all threadpool tasks.
+///
+/// For more details on this design, see Rayon [RFC #1].
+///
+/// [`breadth_first`]: struct.ThreadPoolBuilder.html#method.breadth_first
+/// [RFC #1]: https://github.com/rayon-rs/rfcs/blob/master/accepted/rfc0001-scope-scheduling.md
+///
+/// # Panic handling
+///
+/// If this closure should panic, the resulting panic will be
+/// propagated to the panic handler registered in the `ThreadPoolBuilder`,
+/// if any. See [`ThreadPoolBuilder::panic_handler()`][ph] for more
+/// details.
+///
+/// [ph]: struct.ThreadPoolBuilder.html#method.panic_handler
+pub fn spawn_fifo<F>(func: F)
+where
+    F: FnOnce() + Send + 'static,
+{
+    // We assert that current registry has not terminated.
+    unsafe { spawn_fifo_in(func, &Registry::current()) }
+}
+
+/// Spawns an asynchronous FIFO job in `registry.`
+///
+/// Unsafe because `registry` must not yet have terminated.
+pub(super) unsafe fn spawn_fifo_in<F>(func: F, registry: &Arc<Registry>)
+where
+    F: FnOnce() + Send + 'static,
+{
+    // We assert that this does not hold any references (we know
+    // this because of the `'static` bound in the interface);
+    // moreover, we assert that the code below is not supposed to
+    // be able to panic, and hence the data won't leak but will be
+    // enqueued into some deque for later execution.
+    let abort_guard = unwind::AbortIfPanic; // just in case we are wrong, and code CAN panic
+    let job_ref = unsafe { spawn_job(func, registry) };
+
+    // If we're in the pool, use our thread's private fifo for this thread to execute
+    // in a locally-FIFO order. Otherwise, just use the pool's global injector.
+    match registry.current_thread() {
+        Some(worker) => unsafe { worker.push_fifo(job_ref) },
+        None => registry.inject(job_ref),
+    }
+    mem::forget(abort_guard);
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/compiler/rustc_thread_pool/src/spawn/tests.rs b/compiler/rustc_thread_pool/src/spawn/tests.rs
new file mode 100644
index 0000000000000..8a70d2faf9c6c
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/spawn/tests.rs
@@ -0,0 +1,246 @@
+use std::any::Any;
+use std::sync::Mutex;
+use std::sync::mpsc::channel;
+
+use super::{spawn, spawn_fifo};
+use crate::{ThreadPoolBuilder, scope};
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn spawn_then_join_in_worker() {
+    let (tx, rx) = channel();
+    scope(move |_| {
+        spawn(move || tx.send(22).unwrap());
+    });
+    assert_eq!(22, rx.recv().unwrap());
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn spawn_then_join_outside_worker() {
+    let (tx, rx) = channel();
+    spawn(move || tx.send(22).unwrap());
+    assert_eq!(22, rx.recv().unwrap());
+}
+
+#[test]
+#[cfg_attr(not(panic = "unwind"), ignore)]
+fn panic_fwd() {
+    let (tx, rx) = channel();
+
+    let tx = Mutex::new(tx);
+    let panic_handler = move |err: Box<dyn Any + Send>| {
+        let tx = tx.lock().unwrap();
+        if let Some(&msg) = err.downcast_ref::<&str>() {
+            if msg == "Hello, world!" {
+                tx.send(1).unwrap();
+            } else {
+                tx.send(2).unwrap();
+            }
+        } else {
+            tx.send(3).unwrap();
+        }
+    };
+
+    let builder = ThreadPoolBuilder::new().panic_handler(panic_handler);
+
+    builder.build().unwrap().spawn(move || panic!("Hello, world!"));
+
+    assert_eq!(1, rx.recv().unwrap());
+}
+
+/// Test what happens when the thread-pool is dropped but there are
+/// still active asynchronous tasks. We expect the thread-pool to stay
+/// alive and executing until those threads are complete.
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn termination_while_things_are_executing() {
+    let (tx0, rx0) = channel();
+    let (tx1, rx1) = channel();
+
+    // Create a thread-pool and spawn some code in it, but then drop
+    // our reference to it.
+    {
+        let thread_pool = ThreadPoolBuilder::new().build().unwrap();
+        thread_pool.spawn(move || {
+            let data = rx0.recv().unwrap();
+
+            // At this point, we know the "main" reference to the
+            // `ThreadPool` has been dropped, but there are still
+            // active threads. Launch one more.
+            spawn(move || {
+                tx1.send(data).unwrap();
+            });
+        });
+    }
+
+    tx0.send(22).unwrap();
+    let v = rx1.recv().unwrap();
+    assert_eq!(v, 22);
+}
+
+#[test]
+#[cfg_attr(not(panic = "unwind"), ignore)]
+fn custom_panic_handler_and_spawn() {
+    let (tx, rx) = channel();
+
+    // Create a parallel closure that will send panics on the
+    // channel; since the closure is potentially executed in parallel
+    // with itself, we have to wrap `tx` in a mutex.
+    let tx = Mutex::new(tx);
+    let panic_handler = move |e: Box<dyn Any + Send>| {
+        tx.lock().unwrap().send(e).unwrap();
+    };
+
+    // Execute an async that will panic.
+    let builder = ThreadPoolBuilder::new().panic_handler(panic_handler);
+    builder.build().unwrap().spawn(move || {
+        panic!("Hello, world!");
+    });
+
+    // Check that we got back the panic we expected.
+    let error = rx.recv().unwrap();
+    if let Some(&msg) = error.downcast_ref::<&str>() {
+        assert_eq!(msg, "Hello, world!");
+    } else {
+        panic!("did not receive a string from panic handler");
+    }
+}
+
+#[test]
+#[cfg_attr(not(panic = "unwind"), ignore)]
+fn custom_panic_handler_and_nested_spawn() {
+    let (tx, rx) = channel();
+
+    // Create a parallel closure that will send panics on the
+    // channel; since the closure is potentially executed in parallel
+    // with itself, we have to wrap `tx` in a mutex.
+    let tx = Mutex::new(tx);
+    let panic_handler = move |e| {
+        tx.lock().unwrap().send(e).unwrap();
+    };
+
+    // Execute an async that will (eventually) panic.
+    const PANICS: usize = 3;
+    let builder = ThreadPoolBuilder::new().panic_handler(panic_handler);
+    builder.build().unwrap().spawn(move || {
+        // launch 3 nested spawn-asyncs; these should be in the same
+        // thread-pool and hence inherit the same panic handler
+        for _ in 0..PANICS {
+            spawn(move || {
+                panic!("Hello, world!");
+            });
+        }
+    });
+
+    // Check that we get back the panics we expected.
+    for _ in 0..PANICS {
+        let error = rx.recv().unwrap();
+        if let Some(&msg) = error.downcast_ref::<&str>() {
+            assert_eq!(msg, "Hello, world!");
+        } else {
+            panic!("did not receive a string from panic handler");
+        }
+    }
+}
+
+macro_rules! test_order {
+    ($outer_spawn:ident, $inner_spawn:ident) => {{
+        let builder = ThreadPoolBuilder::new().num_threads(1);
+        let pool = builder.build().unwrap();
+        let (tx, rx) = channel();
+        pool.install(move || {
+            for i in 0..10 {
+                let tx = tx.clone();
+                $outer_spawn(move || {
+                    for j in 0..10 {
+                        let tx = tx.clone();
+                        $inner_spawn(move || {
+                            tx.send(i * 10 + j).unwrap();
+                        });
+                    }
+                });
+            }
+        });
+        rx.iter().collect::<Vec<i32>>()
+    }};
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn lifo_order() {
+    // In the absence of stealing, `spawn()` jobs on a thread will run in LIFO order.
+    let vec = test_order!(spawn, spawn);
+    let expected: Vec<i32> = (0..100).rev().collect(); // LIFO -> reversed
+    assert_eq!(vec, expected);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn fifo_order() {
+    // In the absence of stealing, `spawn_fifo()` jobs on a thread will run in FIFO order.
+    let vec = test_order!(spawn_fifo, spawn_fifo);
+    let expected: Vec<i32> = (0..100).collect(); // FIFO -> natural order
+    assert_eq!(vec, expected);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn lifo_fifo_order() {
+    // LIFO on the outside, FIFO on the inside
+    let vec = test_order!(spawn, spawn_fifo);
+    let expected: Vec<i32> = (0..10).rev().flat_map(|i| (0..10).map(move |j| i * 10 + j)).collect();
+    assert_eq!(vec, expected);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn fifo_lifo_order() {
+    // FIFO on the outside, LIFO on the inside
+    let vec = test_order!(spawn_fifo, spawn);
+    let expected: Vec<i32> = (0..10).flat_map(|i| (0..10).rev().map(move |j| i * 10 + j)).collect();
+    assert_eq!(vec, expected);
+}
+
+macro_rules! spawn_send {
+    ($spawn:ident, $tx:ident, $i:expr) => {{
+        let tx = $tx.clone();
+        $spawn(move || tx.send($i).unwrap());
+    }};
+}
+
+/// Test mixed spawns pushing a series of numbers, interleaved such
+/// such that negative values are using the second kind of spawn.
+macro_rules! test_mixed_order {
+    ($pos_spawn:ident, $neg_spawn:ident) => {{
+        let builder = ThreadPoolBuilder::new().num_threads(1);
+        let pool = builder.build().unwrap();
+        let (tx, rx) = channel();
+        pool.install(move || {
+            spawn_send!($pos_spawn, tx, 0);
+            spawn_send!($neg_spawn, tx, -1);
+            spawn_send!($pos_spawn, tx, 1);
+            spawn_send!($neg_spawn, tx, -2);
+            spawn_send!($pos_spawn, tx, 2);
+            spawn_send!($neg_spawn, tx, -3);
+            spawn_send!($pos_spawn, tx, 3);
+        });
+        rx.iter().collect::<Vec<i32>>()
+    }};
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn mixed_lifo_fifo_order() {
+    let vec = test_mixed_order!(spawn, spawn_fifo);
+    let expected = vec![3, -1, 2, -2, 1, -3, 0];
+    assert_eq!(vec, expected);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn mixed_fifo_lifo_order() {
+    let vec = test_mixed_order!(spawn_fifo, spawn);
+    let expected = vec![0, -3, 1, -2, 2, -1, 3];
+    assert_eq!(vec, expected);
+}
diff --git a/compiler/rustc_thread_pool/src/tests.rs b/compiler/rustc_thread_pool/src/tests.rs
new file mode 100644
index 0000000000000..3082f11a167ac
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/tests.rs
@@ -0,0 +1,197 @@
+#![cfg(test)]
+
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::{Arc, Barrier};
+
+use crate::{ThreadPoolBuildError, ThreadPoolBuilder};
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn worker_thread_index() {
+    let pool = ThreadPoolBuilder::new().num_threads(22).build().unwrap();
+    assert_eq!(pool.current_num_threads(), 22);
+    assert_eq!(pool.current_thread_index(), None);
+    let index = pool.install(|| pool.current_thread_index().unwrap());
+    assert!(index < 22);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn start_callback_called() {
+    let n_threads = 16;
+    let n_called = Arc::new(AtomicUsize::new(0));
+    // Wait for all the threads in the pool plus the one running tests.
+    let barrier = Arc::new(Barrier::new(n_threads + 1));
+
+    let b = Arc::clone(&barrier);
+    let nc = Arc::clone(&n_called);
+    let start_handler = move |_| {
+        nc.fetch_add(1, Ordering::SeqCst);
+        b.wait();
+    };
+
+    let conf = ThreadPoolBuilder::new().num_threads(n_threads).start_handler(start_handler);
+    let _ = conf.build().unwrap();
+
+    // Wait for all the threads to have been scheduled to run.
+    barrier.wait();
+
+    // The handler must have been called on every started thread.
+    assert_eq!(n_called.load(Ordering::SeqCst), n_threads);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn exit_callback_called() {
+    let n_threads = 16;
+    let n_called = Arc::new(AtomicUsize::new(0));
+    // Wait for all the threads in the pool plus the one running tests.
+    let barrier = Arc::new(Barrier::new(n_threads + 1));
+
+    let b = Arc::clone(&barrier);
+    let nc = Arc::clone(&n_called);
+    let exit_handler = move |_| {
+        nc.fetch_add(1, Ordering::SeqCst);
+        b.wait();
+    };
+
+    let conf = ThreadPoolBuilder::new().num_threads(n_threads).exit_handler(exit_handler);
+    {
+        let _ = conf.build().unwrap();
+        // Drop the pool so it stops the running threads.
+    }
+
+    // Wait for all the threads to have been scheduled to run.
+    barrier.wait();
+
+    // The handler must have been called on every exiting thread.
+    assert_eq!(n_called.load(Ordering::SeqCst), n_threads);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn handler_panics_handled_correctly() {
+    let n_threads = 16;
+    let n_called = Arc::new(AtomicUsize::new(0));
+    // Wait for all the threads in the pool plus the one running tests.
+    let start_barrier = Arc::new(Barrier::new(n_threads + 1));
+    let exit_barrier = Arc::new(Barrier::new(n_threads + 1));
+
+    let start_handler = move |_| {
+        panic!("ensure panic handler is called when starting");
+    };
+    let exit_handler = move |_| {
+        panic!("ensure panic handler is called when exiting");
+    };
+
+    let sb = Arc::clone(&start_barrier);
+    let eb = Arc::clone(&exit_barrier);
+    let nc = Arc::clone(&n_called);
+    let panic_handler = move |_| {
+        let val = nc.fetch_add(1, Ordering::SeqCst);
+        if val < n_threads {
+            sb.wait();
+        } else {
+            eb.wait();
+        }
+    };
+
+    let conf = ThreadPoolBuilder::new()
+        .num_threads(n_threads)
+        .start_handler(start_handler)
+        .exit_handler(exit_handler)
+        .panic_handler(panic_handler);
+    {
+        let _ = conf.build().unwrap();
+
+        // Wait for all the threads to start, panic in the start handler,
+        // and been taken care of by the panic handler.
+        start_barrier.wait();
+
+        // Drop the pool so it stops the running threads.
+    }
+
+    // Wait for all the threads to exit, panic in the exit handler,
+    // and been taken care of by the panic handler.
+    exit_barrier.wait();
+
+    // The panic handler must have been called twice on every thread.
+    assert_eq!(n_called.load(Ordering::SeqCst), 2 * n_threads);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn check_config_build() {
+    let pool = ThreadPoolBuilder::new().num_threads(22).build().unwrap();
+    assert_eq!(pool.current_num_threads(), 22);
+}
+
+/// Helper used by check_error_send_sync to ensure ThreadPoolBuildError is Send + Sync
+fn _send_sync<T: Send + Sync>() {}
+
+#[test]
+fn check_error_send_sync() {
+    _send_sync::<ThreadPoolBuildError>();
+}
+
+#[allow(deprecated)]
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn configuration() {
+    let start_handler = move |_| {};
+    let exit_handler = move |_| {};
+    let panic_handler = move |_| {};
+    let thread_name = move |i| format!("thread_name_{}", i);
+
+    // Ensure we can call all public methods on Configuration
+    crate::Configuration::new()
+        .thread_name(thread_name)
+        .num_threads(5)
+        .panic_handler(panic_handler)
+        .stack_size(4e6 as usize)
+        .breadth_first()
+        .start_handler(start_handler)
+        .exit_handler(exit_handler)
+        .build()
+        .unwrap();
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn default_pool() {
+    ThreadPoolBuilder::default().build().unwrap();
+}
+
+/// Test that custom spawned threads get their `WorkerThread` cleared once
+/// the pool is done with them, allowing them to be used with rayon again
+/// later. e.g. WebAssembly want to have their own pool of available threads.
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn cleared_current_thread() -> Result<(), ThreadPoolBuildError> {
+    let n_threads = 5;
+    let mut handles = vec![];
+    let pool = ThreadPoolBuilder::new()
+        .num_threads(n_threads)
+        .spawn_handler(|thread| {
+            let handle = std::thread::spawn(move || {
+                thread.run();
+
+                // Afterward, the current thread shouldn't be set anymore.
+                assert_eq!(crate::current_thread_index(), None);
+            });
+            handles.push(handle);
+            Ok(())
+        })
+        .build()?;
+    assert_eq!(handles.len(), n_threads);
+
+    pool.install(|| assert!(crate::current_thread_index().is_some()));
+    drop(pool);
+
+    // Wait for all threads to make their assertions and exit
+    for handle in handles {
+        handle.join().unwrap();
+    }
+
+    Ok(())
+}
diff --git a/compiler/rustc_thread_pool/src/thread_pool/mod.rs b/compiler/rustc_thread_pool/src/thread_pool/mod.rs
new file mode 100644
index 0000000000000..3294e2a77cbe6
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/thread_pool/mod.rs
@@ -0,0 +1,513 @@
+//! Contains support for user-managed thread pools, represented by the
+//! the [`ThreadPool`] type (see that struct for details).
+//!
+//! [`ThreadPool`]: struct.ThreadPool.html
+
+use std::error::Error;
+use std::fmt;
+use std::sync::Arc;
+
+use crate::broadcast::{self, BroadcastContext};
+use crate::registry::{Registry, ThreadSpawn, WorkerThread};
+use crate::scope::{do_in_place_scope, do_in_place_scope_fifo};
+use crate::{
+    Scope, ScopeFifo, ThreadPoolBuildError, ThreadPoolBuilder, join, scope, scope_fifo, spawn,
+};
+
+mod tests;
+
+/// Represents a user created [thread-pool].
+///
+/// Use a [`ThreadPoolBuilder`] to specify the number and/or names of threads
+/// in the pool. After calling [`ThreadPoolBuilder::build()`], you can then
+/// execute functions explicitly within this [`ThreadPool`] using
+/// [`ThreadPool::install()`]. By contrast, top level rayon functions
+/// (like `join()`) will execute implicitly within the current thread-pool.
+///
+///
+/// ## Creating a ThreadPool
+///
+/// ```rust
+/// # use rustc_thread_pool as rayon;
+/// let pool = rayon::ThreadPoolBuilder::new().num_threads(8).build().unwrap();
+/// ```
+///
+/// [`install()`][`ThreadPool::install()`] executes a closure in one of the `ThreadPool`'s
+/// threads. In addition, any other rayon operations called inside of `install()` will also
+/// execute in the context of the `ThreadPool`.
+///
+/// When the `ThreadPool` is dropped, that's a signal for the threads it manages to terminate,
+/// they will complete executing any remaining work that you have spawned, and automatically
+/// terminate.
+///
+///
+/// [thread-pool]: https://en.wikipedia.org/wiki/Thread_pool
+/// [`ThreadPool`]: struct.ThreadPool.html
+/// [`ThreadPool::new()`]: struct.ThreadPool.html#method.new
+/// [`ThreadPoolBuilder`]: struct.ThreadPoolBuilder.html
+/// [`ThreadPoolBuilder::build()`]: struct.ThreadPoolBuilder.html#method.build
+/// [`ThreadPool::install()`]: struct.ThreadPool.html#method.install
+pub struct ThreadPool {
+    registry: Arc<Registry>,
+}
+
+impl ThreadPool {
+    #[deprecated(note = "Use `ThreadPoolBuilder::build`")]
+    #[allow(deprecated)]
+    /// Deprecated in favor of `ThreadPoolBuilder::build`.
+    pub fn new(configuration: crate::Configuration) -> Result<ThreadPool, Box<dyn Error>> {
+        Self::build(configuration.into_builder()).map_err(Box::from)
+    }
+
+    pub(super) fn build<S>(
+        builder: ThreadPoolBuilder<S>,
+    ) -> Result<ThreadPool, ThreadPoolBuildError>
+    where
+        S: ThreadSpawn,
+    {
+        let registry = Registry::new(builder)?;
+        Ok(ThreadPool { registry })
+    }
+
+    /// Executes `op` within the threadpool. Any attempts to use
+    /// `join`, `scope`, or parallel iterators will then operate
+    /// within that threadpool.
+    ///
+    /// # Warning: thread-local data
+    ///
+    /// Because `op` is executing within the Rayon thread-pool,
+    /// thread-local data from the current thread will not be
+    /// accessible.
+    ///
+    /// # Warning: execution order
+    ///
+    /// If the current thread is part of a different thread pool, it will try to
+    /// keep busy while the `op` completes in its target pool, similar to
+    /// calling [`ThreadPool::yield_now()`] in a loop. Therefore, it may
+    /// potentially schedule other tasks to run on the current thread in the
+    /// meantime. For example
+    ///
+    /// ```rust
+    /// # use rustc_thread_pool as rayon;
+    /// fn main() {
+    ///     rayon::ThreadPoolBuilder::new().num_threads(1).build_global().unwrap();
+    ///     let pool = rustc_thread_pool::ThreadPoolBuilder::default().build().unwrap();
+    ///     let do_it = || {
+    ///         print!("one ");
+    ///         pool.install(||{});
+    ///         print!("two ");
+    ///     };
+    ///     rayon::join(|| do_it(), || do_it());
+    /// }
+    /// ```
+    ///
+    /// Since we configured just one thread in the global pool, one might
+    /// expect `do_it()` to run sequentially, producing:
+    ///
+    /// ```ascii
+    /// one two one two
+    /// ```
+    ///
+    /// However each call to `install()` yields implicitly, allowing rayon to
+    /// run multiple instances of `do_it()` concurrently on the single, global
+    /// thread. The following output would be equally valid:
+    ///
+    /// ```ascii
+    /// one one two two
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// If `op` should panic, that panic will be propagated.
+    ///
+    /// ## Using `install()`
+    ///
+    /// ```rust
+    ///    # use rustc_thread_pool as rayon;
+    ///    fn main() {
+    ///         let pool = rayon::ThreadPoolBuilder::new().num_threads(8).build().unwrap();
+    ///         let n = pool.install(|| fib(20));
+    ///         println!("{}", n);
+    ///    }
+    ///
+    ///    fn fib(n: usize) -> usize {
+    ///         if n == 0 || n == 1 {
+    ///             return n;
+    ///         }
+    ///         let (a, b) = rayon::join(|| fib(n - 1), || fib(n - 2)); // runs inside of `pool`
+    ///         return a + b;
+    ///     }
+    /// ```
+    pub fn install<OP, R>(&self, op: OP) -> R
+    where
+        OP: FnOnce() -> R + Send,
+        R: Send,
+    {
+        self.registry.in_worker(|_, _| op())
+    }
+
+    /// Executes `op` within every thread in the threadpool. Any attempts to use
+    /// `join`, `scope`, or parallel iterators will then operate within that
+    /// threadpool.
+    ///
+    /// Broadcasts are executed on each thread after they have exhausted their
+    /// local work queue, before they attempt work-stealing from other threads.
+    /// The goal of that strategy is to run everywhere in a timely manner
+    /// *without* being too disruptive to current work. There may be alternative
+    /// broadcast styles added in the future for more or less aggressive
+    /// injection, if the need arises.
+    ///
+    /// # Warning: thread-local data
+    ///
+    /// Because `op` is executing within the Rayon thread-pool,
+    /// thread-local data from the current thread will not be
+    /// accessible.
+    ///
+    /// # Panics
+    ///
+    /// If `op` should panic on one or more threads, exactly one panic
+    /// will be propagated, only after all threads have completed
+    /// (or panicked) their own `op`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    ///    # use rustc_thread_pool as rayon;
+    ///    use std::sync::atomic::{AtomicUsize, Ordering};
+    ///
+    ///    fn main() {
+    ///         let pool = rayon::ThreadPoolBuilder::new().num_threads(5).build().unwrap();
+    ///
+    ///         // The argument gives context, including the index of each thread.
+    ///         let v: Vec<usize> = pool.broadcast(|ctx| ctx.index() * ctx.index());
+    ///         assert_eq!(v, &[0, 1, 4, 9, 16]);
+    ///
+    ///         // The closure can reference the local stack
+    ///         let count = AtomicUsize::new(0);
+    ///         pool.broadcast(|_| count.fetch_add(1, Ordering::Relaxed));
+    ///         assert_eq!(count.into_inner(), 5);
+    ///    }
+    /// ```
+    pub fn broadcast<OP, R>(&self, op: OP) -> Vec<R>
+    where
+        OP: Fn(BroadcastContext<'_>) -> R + Sync,
+        R: Send,
+    {
+        // We assert that `self.registry` has not terminated.
+        unsafe { broadcast::broadcast_in(op, &self.registry) }
+    }
+
+    /// Returns the (current) number of threads in the thread pool.
+    ///
+    /// # Future compatibility note
+    ///
+    /// Note that unless this thread-pool was created with a
+    /// [`ThreadPoolBuilder`] that specifies the number of threads,
+    /// then this number may vary over time in future versions (see [the
+    /// `num_threads()` method for details][snt]).
+    ///
+    /// [snt]: struct.ThreadPoolBuilder.html#method.num_threads
+    /// [`ThreadPoolBuilder`]: struct.ThreadPoolBuilder.html
+    #[inline]
+    pub fn current_num_threads(&self) -> usize {
+        self.registry.num_threads()
+    }
+
+    /// If called from a Rayon worker thread in this thread-pool,
+    /// returns the index of that thread; if not called from a Rayon
+    /// thread, or called from a Rayon thread that belongs to a
+    /// different thread-pool, returns `None`.
+    ///
+    /// The index for a given thread will not change over the thread's
+    /// lifetime. However, multiple threads may share the same index if
+    /// they are in distinct thread-pools.
+    ///
+    /// # Future compatibility note
+    ///
+    /// Currently, every thread-pool (including the global
+    /// thread-pool) has a fixed number of threads, but this may
+    /// change in future Rayon versions (see [the `num_threads()` method
+    /// for details][snt]). In that case, the index for a
+    /// thread would not change during its lifetime, but thread
+    /// indices may wind up being reused if threads are terminated and
+    /// restarted.
+    ///
+    /// [snt]: struct.ThreadPoolBuilder.html#method.num_threads
+    #[inline]
+    pub fn current_thread_index(&self) -> Option<usize> {
+        let curr = self.registry.current_thread()?;
+        Some(curr.index())
+    }
+
+    /// Returns true if the current worker thread currently has "local
+    /// tasks" pending. This can be useful as part of a heuristic for
+    /// deciding whether to spawn a new task or execute code on the
+    /// current thread, particularly in breadth-first
+    /// schedulers. However, keep in mind that this is an inherently
+    /// racy check, as other worker threads may be actively "stealing"
+    /// tasks from our local deque.
+    ///
+    /// **Background:** Rayon's uses a [work-stealing] scheduler. The
+    /// key idea is that each thread has its own [deque] of
+    /// tasks. Whenever a new task is spawned -- whether through
+    /// `join()`, `Scope::spawn()`, or some other means -- that new
+    /// task is pushed onto the thread's *local* deque. Worker threads
+    /// have a preference for executing their own tasks; if however
+    /// they run out of tasks, they will go try to "steal" tasks from
+    /// other threads. This function therefore has an inherent race
+    /// with other active worker threads, which may be removing items
+    /// from the local deque.
+    ///
+    /// [work-stealing]: https://en.wikipedia.org/wiki/Work_stealing
+    /// [deque]: https://en.wikipedia.org/wiki/Double-ended_queue
+    #[inline]
+    pub fn current_thread_has_pending_tasks(&self) -> Option<bool> {
+        let curr = self.registry.current_thread()?;
+        Some(!curr.local_deque_is_empty())
+    }
+
+    /// Execute `oper_a` and `oper_b` in the thread-pool and return
+    /// the results. Equivalent to `self.install(|| join(oper_a,
+    /// oper_b))`.
+    pub fn join<A, B, RA, RB>(&self, oper_a: A, oper_b: B) -> (RA, RB)
+    where
+        A: FnOnce() -> RA + Send,
+        B: FnOnce() -> RB + Send,
+        RA: Send,
+        RB: Send,
+    {
+        self.install(|| join(oper_a, oper_b))
+    }
+
+    /// Creates a scope that executes within this thread-pool.
+    /// Equivalent to `self.install(|| scope(...))`.
+    ///
+    /// See also: [the `scope()` function][scope].
+    ///
+    /// [scope]: fn.scope.html
+    pub fn scope<'scope, OP, R>(&self, op: OP) -> R
+    where
+        OP: FnOnce(&Scope<'scope>) -> R + Send,
+        R: Send,
+    {
+        self.install(|| scope(op))
+    }
+
+    /// Creates a scope that executes within this thread-pool.
+    /// Spawns from the same thread are prioritized in relative FIFO order.
+    /// Equivalent to `self.install(|| scope_fifo(...))`.
+    ///
+    /// See also: [the `scope_fifo()` function][scope_fifo].
+    ///
+    /// [scope_fifo]: fn.scope_fifo.html
+    pub fn scope_fifo<'scope, OP, R>(&self, op: OP) -> R
+    where
+        OP: FnOnce(&ScopeFifo<'scope>) -> R + Send,
+        R: Send,
+    {
+        self.install(|| scope_fifo(op))
+    }
+
+    /// Creates a scope that spawns work into this thread-pool.
+    ///
+    /// See also: [the `in_place_scope()` function][in_place_scope].
+    ///
+    /// [in_place_scope]: fn.in_place_scope.html
+    pub fn in_place_scope<'scope, OP, R>(&self, op: OP) -> R
+    where
+        OP: FnOnce(&Scope<'scope>) -> R,
+    {
+        do_in_place_scope(Some(&self.registry), op)
+    }
+
+    /// Creates a scope that spawns work into this thread-pool in FIFO order.
+    ///
+    /// See also: [the `in_place_scope_fifo()` function][in_place_scope_fifo].
+    ///
+    /// [in_place_scope_fifo]: fn.in_place_scope_fifo.html
+    pub fn in_place_scope_fifo<'scope, OP, R>(&self, op: OP) -> R
+    where
+        OP: FnOnce(&ScopeFifo<'scope>) -> R,
+    {
+        do_in_place_scope_fifo(Some(&self.registry), op)
+    }
+
+    /// Spawns an asynchronous task in this thread-pool. This task will
+    /// run in the implicit, global scope, which means that it may outlast
+    /// the current stack frame -- therefore, it cannot capture any references
+    /// onto the stack (you will likely need a `move` closure).
+    ///
+    /// See also: [the `spawn()` function defined on scopes][spawn].
+    ///
+    /// [spawn]: struct.Scope.html#method.spawn
+    pub fn spawn<OP>(&self, op: OP)
+    where
+        OP: FnOnce() + Send + 'static,
+    {
+        // We assert that `self.registry` has not terminated.
+        unsafe { spawn::spawn_in(op, &self.registry) }
+    }
+
+    /// Spawns an asynchronous task in this thread-pool. This task will
+    /// run in the implicit, global scope, which means that it may outlast
+    /// the current stack frame -- therefore, it cannot capture any references
+    /// onto the stack (you will likely need a `move` closure).
+    ///
+    /// See also: [the `spawn_fifo()` function defined on scopes][spawn_fifo].
+    ///
+    /// [spawn_fifo]: struct.ScopeFifo.html#method.spawn_fifo
+    pub fn spawn_fifo<OP>(&self, op: OP)
+    where
+        OP: FnOnce() + Send + 'static,
+    {
+        // We assert that `self.registry` has not terminated.
+        unsafe { spawn::spawn_fifo_in(op, &self.registry) }
+    }
+
+    /// Spawns an asynchronous task on every thread in this thread-pool. This task
+    /// will run in the implicit, global scope, which means that it may outlast the
+    /// current stack frame -- therefore, it cannot capture any references onto the
+    /// stack (you will likely need a `move` closure).
+    pub fn spawn_broadcast<OP>(&self, op: OP)
+    where
+        OP: Fn(BroadcastContext<'_>) + Send + Sync + 'static,
+    {
+        // We assert that `self.registry` has not terminated.
+        unsafe { broadcast::spawn_broadcast_in(op, &self.registry) }
+    }
+
+    /// Cooperatively yields execution to Rayon.
+    ///
+    /// This is similar to the general [`yield_now()`], but only if the current
+    /// thread is part of *this* thread pool.
+    ///
+    /// Returns `Some(Yield::Executed)` if anything was executed, `Some(Yield::Idle)` if
+    /// nothing was available, or `None` if the current thread is not part this pool.
+    pub fn yield_now(&self) -> Option<Yield> {
+        let curr = self.registry.current_thread()?;
+        Some(curr.yield_now())
+    }
+
+    /// Cooperatively yields execution to local Rayon work.
+    ///
+    /// This is similar to the general [`yield_local()`], but only if the current
+    /// thread is part of *this* thread pool.
+    ///
+    /// Returns `Some(Yield::Executed)` if anything was executed, `Some(Yield::Idle)` if
+    /// nothing was available, or `None` if the current thread is not part this pool.
+    pub fn yield_local(&self) -> Option<Yield> {
+        let curr = self.registry.current_thread()?;
+        Some(curr.yield_local())
+    }
+
+    pub(crate) fn wait_until_stopped(self) {
+        let registry = Arc::clone(&self.registry);
+        drop(self);
+        registry.wait_until_stopped();
+    }
+}
+
+impl Drop for ThreadPool {
+    fn drop(&mut self) {
+        self.registry.terminate();
+    }
+}
+
+impl fmt::Debug for ThreadPool {
+    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt.debug_struct("ThreadPool")
+            .field("num_threads", &self.current_num_threads())
+            .field("id", &self.registry.id())
+            .finish()
+    }
+}
+
+/// If called from a Rayon worker thread, returns the index of that
+/// thread within its current pool; if not called from a Rayon thread,
+/// returns `None`.
+///
+/// The index for a given thread will not change over the thread's
+/// lifetime. However, multiple threads may share the same index if
+/// they are in distinct thread-pools.
+///
+/// See also: [the `ThreadPool::current_thread_index()` method].
+///
+/// [m]: struct.ThreadPool.html#method.current_thread_index
+///
+/// # Future compatibility note
+///
+/// Currently, every thread-pool (including the global
+/// thread-pool) has a fixed number of threads, but this may
+/// change in future Rayon versions (see [the `num_threads()` method
+/// for details][snt]). In that case, the index for a
+/// thread would not change during its lifetime, but thread
+/// indices may wind up being reused if threads are terminated and
+/// restarted.
+///
+/// [snt]: struct.ThreadPoolBuilder.html#method.num_threads
+#[inline]
+pub fn current_thread_index() -> Option<usize> {
+    unsafe {
+        let curr = WorkerThread::current().as_ref()?;
+        Some(curr.index())
+    }
+}
+
+/// If called from a Rayon worker thread, indicates whether that
+/// thread's local deque still has pending tasks. Otherwise, returns
+/// `None`. For more information, see [the
+/// `ThreadPool::current_thread_has_pending_tasks()` method][m].
+///
+/// [m]: struct.ThreadPool.html#method.current_thread_has_pending_tasks
+#[inline]
+pub fn current_thread_has_pending_tasks() -> Option<bool> {
+    unsafe {
+        let curr = WorkerThread::current().as_ref()?;
+        Some(!curr.local_deque_is_empty())
+    }
+}
+
+/// Cooperatively yields execution to Rayon.
+///
+/// If the current thread is part of a rayon thread pool, this looks for a
+/// single unit of pending work in the pool, then executes it. Completion of
+/// that work might include nested work or further work stealing.
+///
+/// This is similar to [`std::thread::yield_now()`], but does not literally make
+/// that call. If you are implementing a polling loop, you may want to also
+/// yield to the OS scheduler yourself if no Rayon work was found.
+///
+/// Returns `Some(Yield::Executed)` if anything was executed, `Some(Yield::Idle)` if
+/// nothing was available, or `None` if this thread is not part of any pool at all.
+pub fn yield_now() -> Option<Yield> {
+    unsafe {
+        let thread = WorkerThread::current().as_ref()?;
+        Some(thread.yield_now())
+    }
+}
+
+/// Cooperatively yields execution to local Rayon work.
+///
+/// If the current thread is part of a rayon thread pool, this looks for a
+/// single unit of pending work in this thread's queue, then executes it.
+/// Completion of that work might include nested work or further work stealing.
+///
+/// This is similar to [`yield_now()`], but does not steal from other threads.
+///
+/// Returns `Some(Yield::Executed)` if anything was executed, `Some(Yield::Idle)` if
+/// nothing was available, or `None` if this thread is not part of any pool at all.
+pub fn yield_local() -> Option<Yield> {
+    unsafe {
+        let thread = WorkerThread::current().as_ref()?;
+        Some(thread.yield_local())
+    }
+}
+
+/// Result of [`yield_now()`] or [`yield_local()`].
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum Yield {
+    /// Work was found and executed.
+    Executed,
+    /// No available work was found.
+    Idle,
+}
diff --git a/compiler/rustc_thread_pool/src/thread_pool/tests.rs b/compiler/rustc_thread_pool/src/thread_pool/tests.rs
new file mode 100644
index 0000000000000..42c99565088af
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/thread_pool/tests.rs
@@ -0,0 +1,416 @@
+#![cfg(test)]
+
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::mpsc::channel;
+use std::sync::{Arc, Mutex};
+
+use crate::{Scope, ScopeFifo, ThreadPool, ThreadPoolBuilder, join};
+
+#[test]
+#[should_panic(expected = "Hello, world!")]
+fn panic_propagate() {
+    let thread_pool = ThreadPoolBuilder::new().build().unwrap();
+    thread_pool.install(|| {
+        panic!("Hello, world!");
+    });
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn workers_stop() {
+    let registry;
+
+    {
+        // once we exit this block, thread-pool will be dropped
+        let thread_pool = ThreadPoolBuilder::new().num_threads(22).build().unwrap();
+        registry = thread_pool.install(|| {
+            // do some work on these threads
+            join_a_lot(22);
+
+            Arc::clone(&thread_pool.registry)
+        });
+        assert_eq!(registry.num_threads(), 22);
+    }
+
+    // once thread-pool is dropped, registry should terminate, which
+    // should lead to worker threads stopping
+    registry.wait_until_stopped();
+}
+
+fn join_a_lot(n: usize) {
+    if n > 0 {
+        join(|| join_a_lot(n - 1), || join_a_lot(n - 1));
+    }
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn sleeper_stop() {
+    use std::{thread, time};
+
+    let registry;
+
+    {
+        // once we exit this block, thread-pool will be dropped
+        let thread_pool = ThreadPoolBuilder::new().num_threads(22).build().unwrap();
+        registry = Arc::clone(&thread_pool.registry);
+
+        // Give time for at least some of the thread pool to fall asleep.
+        thread::sleep(time::Duration::from_secs(1));
+    }
+
+    // once thread-pool is dropped, registry should terminate, which
+    // should lead to worker threads stopping
+    registry.wait_until_stopped();
+}
+
+/// Creates a start/exit handler that increments an atomic counter.
+fn count_handler() -> (Arc<AtomicUsize>, impl Fn(usize)) {
+    let count = Arc::new(AtomicUsize::new(0));
+    (Arc::clone(&count), move |_| {
+        count.fetch_add(1, Ordering::SeqCst);
+    })
+}
+
+/// Wait until a counter is no longer shared, then return its value.
+fn wait_for_counter(mut counter: Arc<AtomicUsize>) -> usize {
+    use std::{thread, time};
+
+    for _ in 0..60 {
+        counter = match Arc::try_unwrap(counter) {
+            Ok(counter) => return counter.into_inner(),
+            Err(counter) => {
+                thread::sleep(time::Duration::from_secs(1));
+                counter
+            }
+        };
+    }
+
+    // That's too long!
+    panic!("Counter is still shared!");
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn failed_thread_stack() {
+    // Note: we first tried to force failure with a `usize::MAX` stack, but
+    // macOS and Windows weren't fazed, or at least didn't fail the way we want.
+    // They work with `isize::MAX`, but 32-bit platforms may feasibly allocate a
+    // 2GB stack, so it might not fail until the second thread.
+    let stack_size = ::std::isize::MAX as usize;
+
+    let (start_count, start_handler) = count_handler();
+    let (exit_count, exit_handler) = count_handler();
+    let builder = ThreadPoolBuilder::new()
+        .num_threads(10)
+        .stack_size(stack_size)
+        .start_handler(start_handler)
+        .exit_handler(exit_handler);
+
+    let pool = builder.build();
+    assert!(pool.is_err(), "thread stack should have failed!");
+
+    // With such a huge stack, 64-bit will probably fail on the first thread;
+    // 32-bit might manage the first 2GB, but certainly fail the second.
+    let start_count = wait_for_counter(start_count);
+    assert!(start_count <= 1);
+    assert_eq!(start_count, wait_for_counter(exit_count));
+}
+
+#[test]
+#[cfg_attr(not(panic = "unwind"), ignore)]
+fn panic_thread_name() {
+    let (start_count, start_handler) = count_handler();
+    let (exit_count, exit_handler) = count_handler();
+    let builder = ThreadPoolBuilder::new()
+        .num_threads(10)
+        .start_handler(start_handler)
+        .exit_handler(exit_handler)
+        .thread_name(|i| {
+            if i >= 5 {
+                panic!();
+            }
+            format!("panic_thread_name#{}", i)
+        });
+
+    let pool = crate::unwind::halt_unwinding(|| builder.build());
+    assert!(pool.is_err(), "thread-name panic should propagate!");
+
+    // Assuming they're created in order, threads 0 through 4 should have
+    // been started already, and then terminated by the panic.
+    assert_eq!(5, wait_for_counter(start_count));
+    assert_eq!(5, wait_for_counter(exit_count));
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn self_install() {
+    let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+
+    // If the inner `install` blocks, then nothing will actually run it!
+    assert!(pool.install(|| pool.install(|| true)));
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn mutual_install() {
+    let pool1 = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+    let pool2 = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+
+    let ok = pool1.install(|| {
+        // This creates a dependency from `pool1` -> `pool2`
+        pool2.install(|| {
+            // This creates a dependency from `pool2` -> `pool1`
+            pool1.install(|| {
+                // If they blocked on inter-pool installs, there would be no
+                // threads left to run this!
+                true
+            })
+        })
+    });
+    assert!(ok);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn mutual_install_sleepy() {
+    use std::{thread, time};
+
+    let pool1 = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+    let pool2 = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+
+    let ok = pool1.install(|| {
+        // This creates a dependency from `pool1` -> `pool2`
+        pool2.install(|| {
+            // Give `pool1` time to fall asleep.
+            thread::sleep(time::Duration::from_secs(1));
+
+            // This creates a dependency from `pool2` -> `pool1`
+            pool1.install(|| {
+                // Give `pool2` time to fall asleep.
+                thread::sleep(time::Duration::from_secs(1));
+
+                // If they blocked on inter-pool installs, there would be no
+                // threads left to run this!
+                true
+            })
+        })
+    });
+    assert!(ok);
+}
+
+#[test]
+#[allow(deprecated)]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn check_thread_pool_new() {
+    let pool = ThreadPool::new(crate::Configuration::new().num_threads(22)).unwrap();
+    assert_eq!(pool.current_num_threads(), 22);
+}
+
+macro_rules! test_scope_order {
+    ($scope:ident => $spawn:ident) => {{
+        let builder = ThreadPoolBuilder::new().num_threads(1);
+        let pool = builder.build().unwrap();
+        pool.install(|| {
+            let vec = Mutex::new(vec![]);
+            pool.$scope(|scope| {
+                let vec = &vec;
+                for i in 0..10 {
+                    scope.$spawn(move |_| {
+                        vec.lock().unwrap().push(i);
+                    });
+                }
+            });
+            vec.into_inner().unwrap()
+        })
+    }};
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn scope_lifo_order() {
+    let vec = test_scope_order!(scope => spawn);
+    let expected: Vec<i32> = (0..10).rev().collect(); // LIFO -> reversed
+    assert_eq!(vec, expected);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn scope_fifo_order() {
+    let vec = test_scope_order!(scope_fifo => spawn_fifo);
+    let expected: Vec<i32> = (0..10).collect(); // FIFO -> natural order
+    assert_eq!(vec, expected);
+}
+
+macro_rules! test_spawn_order {
+    ($spawn:ident) => {{
+        let builder = ThreadPoolBuilder::new().num_threads(1);
+        let pool = &builder.build().unwrap();
+        let (tx, rx) = channel();
+        pool.install(move || {
+            for i in 0..10 {
+                let tx = tx.clone();
+                pool.$spawn(move || {
+                    tx.send(i).unwrap();
+                });
+            }
+        });
+        rx.iter().collect::<Vec<i32>>()
+    }};
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn spawn_lifo_order() {
+    let vec = test_spawn_order!(spawn);
+    let expected: Vec<i32> = (0..10).rev().collect(); // LIFO -> reversed
+    assert_eq!(vec, expected);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn spawn_fifo_order() {
+    let vec = test_spawn_order!(spawn_fifo);
+    let expected: Vec<i32> = (0..10).collect(); // FIFO -> natural order
+    assert_eq!(vec, expected);
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn nested_scopes() {
+    // Create matching scopes for every thread pool.
+    fn nest<'scope, OP>(pools: &[ThreadPool], scopes: Vec<&Scope<'scope>>, op: OP)
+    where
+        OP: FnOnce(&[&Scope<'scope>]) + Send,
+    {
+        if let Some((pool, tail)) = pools.split_first() {
+            pool.scope(move |s| {
+                // This move reduces the reference lifetimes by variance to match s,
+                // but the actual scopes are still tied to the invariant 'scope.
+                let mut scopes = scopes;
+                scopes.push(s);
+                nest(tail, scopes, op)
+            })
+        } else {
+            (op)(&scopes)
+        }
+    }
+
+    let pools: Vec<_> =
+        (0..10).map(|_| ThreadPoolBuilder::new().num_threads(1).build().unwrap()).collect();
+
+    let counter = AtomicUsize::new(0);
+    nest(&pools, vec![], |scopes| {
+        for &s in scopes {
+            s.spawn(|_| {
+                // Our 'scope lets us borrow the counter in every pool.
+                counter.fetch_add(1, Ordering::Relaxed);
+            });
+        }
+    });
+    assert_eq!(counter.into_inner(), pools.len());
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn nested_fifo_scopes() {
+    // Create matching fifo scopes for every thread pool.
+    fn nest<'scope, OP>(pools: &[ThreadPool], scopes: Vec<&ScopeFifo<'scope>>, op: OP)
+    where
+        OP: FnOnce(&[&ScopeFifo<'scope>]) + Send,
+    {
+        if let Some((pool, tail)) = pools.split_first() {
+            pool.scope_fifo(move |s| {
+                // This move reduces the reference lifetimes by variance to match s,
+                // but the actual scopes are still tied to the invariant 'scope.
+                let mut scopes = scopes;
+                scopes.push(s);
+                nest(tail, scopes, op)
+            })
+        } else {
+            (op)(&scopes)
+        }
+    }
+
+    let pools: Vec<_> =
+        (0..10).map(|_| ThreadPoolBuilder::new().num_threads(1).build().unwrap()).collect();
+
+    let counter = AtomicUsize::new(0);
+    nest(&pools, vec![], |scopes| {
+        for &s in scopes {
+            s.spawn_fifo(|_| {
+                // Our 'scope lets us borrow the counter in every pool.
+                counter.fetch_add(1, Ordering::Relaxed);
+            });
+        }
+    });
+    assert_eq!(counter.into_inner(), pools.len());
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn in_place_scope_no_deadlock() {
+    let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+    let (tx, rx) = channel();
+    let rx_ref = &rx;
+    pool.in_place_scope(move |s| {
+        // With regular scopes this closure would never run because this scope op
+        // itself would block the only worker thread.
+        s.spawn(move |_| {
+            tx.send(()).unwrap();
+        });
+        rx_ref.recv().unwrap();
+    });
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn in_place_scope_fifo_no_deadlock() {
+    let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+    let (tx, rx) = channel();
+    let rx_ref = &rx;
+    pool.in_place_scope_fifo(move |s| {
+        // With regular scopes this closure would never run because this scope op
+        // itself would block the only worker thread.
+        s.spawn_fifo(move |_| {
+            tx.send(()).unwrap();
+        });
+        rx_ref.recv().unwrap();
+    });
+}
+
+#[test]
+fn yield_now_to_spawn() {
+    let (tx, rx) = channel();
+
+    // Queue a regular spawn.
+    crate::spawn(move || tx.send(22).unwrap());
+
+    // The single-threaded fallback mode (for wasm etc.) won't
+    // get a chance to run the spawn if we never yield to it.
+    crate::registry::in_worker(move |_, _| {
+        crate::yield_now();
+    });
+
+    // The spawn **must** have started by now, but we still might have to wait
+    // for it to finish if a different thread stole it first.
+    assert_eq!(22, rx.recv().unwrap());
+}
+
+#[test]
+fn yield_local_to_spawn() {
+    let (tx, rx) = channel();
+
+    // Queue a regular spawn.
+    crate::spawn(move || tx.send(22).unwrap());
+
+    // The single-threaded fallback mode (for wasm etc.) won't
+    // get a chance to run the spawn if we never yield to it.
+    crate::registry::in_worker(move |_, _| {
+        crate::yield_local();
+    });
+
+    // The spawn **must** have started by now, but we still might have to wait
+    // for it to finish if a different thread stole it first.
+    assert_eq!(22, rx.recv().unwrap());
+}
diff --git a/compiler/rustc_thread_pool/src/tlv.rs b/compiler/rustc_thread_pool/src/tlv.rs
new file mode 100644
index 0000000000000..b5f63479e2fe9
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/tlv.rs
@@ -0,0 +1,32 @@
+//! Allows access to the Rayon's thread local value
+//! which is preserved when moving jobs across threads
+
+use std::cell::Cell;
+use std::ptr;
+
+thread_local!(pub static TLV: Cell<*const ()> = const { Cell::new(ptr::null()) });
+
+#[derive(Copy, Clone)]
+pub(crate) struct Tlv(pub(crate) *const ());
+
+impl Tlv {
+    #[inline]
+    pub(crate) fn null() -> Self {
+        Self(ptr::null())
+    }
+}
+
+unsafe impl Sync for Tlv {}
+unsafe impl Send for Tlv {}
+
+/// Sets the current thread-local value
+#[inline]
+pub(crate) fn set(value: Tlv) {
+    TLV.with(|tlv| tlv.set(value.0));
+}
+
+/// Returns the current thread-local value
+#[inline]
+pub(crate) fn get() -> Tlv {
+    TLV.with(|tlv| Tlv(tlv.get()))
+}
diff --git a/compiler/rustc_thread_pool/src/unwind.rs b/compiler/rustc_thread_pool/src/unwind.rs
new file mode 100644
index 0000000000000..9671fa5782147
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/unwind.rs
@@ -0,0 +1,31 @@
+//! Package up unwind recovery. Note that if you are in some sensitive
+//! place, you can use the `AbortIfPanic` helper to protect against
+//! accidental panics in the rayon code itself.
+
+use std::any::Any;
+use std::panic::{self, AssertUnwindSafe};
+use std::thread;
+
+/// Executes `f` and captures any panic, translating that panic into a
+/// `Err` result. The assumption is that any panic will be propagated
+/// later with `resume_unwinding`, and hence `f` can be treated as
+/// exception safe.
+pub(super) fn halt_unwinding<F, R>(func: F) -> thread::Result<R>
+where
+    F: FnOnce() -> R,
+{
+    panic::catch_unwind(AssertUnwindSafe(func))
+}
+
+pub(super) fn resume_unwinding(payload: Box<dyn Any + Send>) -> ! {
+    panic::resume_unwind(payload)
+}
+
+pub(super) struct AbortIfPanic;
+
+impl Drop for AbortIfPanic {
+    fn drop(&mut self) {
+        eprintln!("Rayon: detected unexpected panic; aborting");
+        ::std::process::abort();
+    }
+}
diff --git a/compiler/rustc_thread_pool/src/worker_local.rs b/compiler/rustc_thread_pool/src/worker_local.rs
new file mode 100644
index 0000000000000..d108c91f9ee53
--- /dev/null
+++ b/compiler/rustc_thread_pool/src/worker_local.rs
@@ -0,0 +1,75 @@
+use std::fmt;
+use std::ops::Deref;
+use std::sync::Arc;
+
+use crate::registry::{Registry, WorkerThread};
+
+#[repr(align(64))]
+#[derive(Debug)]
+struct CacheAligned<T>(T);
+
+/// Holds worker-locals values for each thread in a thread pool.
+/// You can only access the worker local value through the Deref impl
+/// on the thread pool it was constructed on. It will panic otherwise
+pub struct WorkerLocal<T> {
+    locals: Vec<CacheAligned<T>>,
+    registry: Arc<Registry>,
+}
+
+/// We prevent concurrent access to the underlying value in the
+/// Deref impl, thus any values safe to send across threads can
+/// be used with WorkerLocal.
+unsafe impl<T: Send> Sync for WorkerLocal<T> {}
+
+impl<T> WorkerLocal<T> {
+    /// Creates a new worker local where the `initial` closure computes the
+    /// value this worker local should take for each thread in the thread pool.
+    #[inline]
+    pub fn new<F: FnMut(usize) -> T>(mut initial: F) -> WorkerLocal<T> {
+        let registry = Registry::current();
+        WorkerLocal {
+            locals: (0..registry.num_threads()).map(|i| CacheAligned(initial(i))).collect(),
+            registry,
+        }
+    }
+
+    /// Returns the worker-local value for each thread
+    #[inline]
+    pub fn into_inner(self) -> Vec<T> {
+        self.locals.into_iter().map(|c| c.0).collect()
+    }
+
+    fn current(&self) -> &T {
+        unsafe {
+            let worker_thread = WorkerThread::current();
+            if worker_thread.is_null()
+                || &*(*worker_thread).registry as *const _ != &*self.registry as *const _
+            {
+                panic!("WorkerLocal can only be used on the thread pool it was created on")
+            }
+            &self.locals[(*worker_thread).index].0
+        }
+    }
+}
+
+impl<T> WorkerLocal<Vec<T>> {
+    /// Joins the elements of all the worker locals into one Vec
+    pub fn join(self) -> Vec<T> {
+        self.into_inner().into_iter().flat_map(|v| v).collect()
+    }
+}
+
+impl<T: fmt::Debug> fmt::Debug for WorkerLocal<T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("WorkerLocal").field("registry", &self.registry.id()).finish()
+    }
+}
+
+impl<T> Deref for WorkerLocal<T> {
+    type Target = T;
+
+    #[inline(always)]
+    fn deref(&self) -> &T {
+        self.current()
+    }
+}
diff --git a/compiler/rustc_thread_pool/tests/double_init_fail.rs b/compiler/rustc_thread_pool/tests/double_init_fail.rs
new file mode 100644
index 0000000000000..85e509518d433
--- /dev/null
+++ b/compiler/rustc_thread_pool/tests/double_init_fail.rs
@@ -0,0 +1,15 @@
+#![allow(unused_crate_dependencies)]
+
+use std::error::Error;
+
+use rustc_thread_pool::ThreadPoolBuilder;
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn double_init_fail() {
+    let result1 = ThreadPoolBuilder::new().build_global();
+    assert!(result1.is_ok());
+    let err = ThreadPoolBuilder::new().build_global().unwrap_err();
+    assert!(err.source().is_none());
+    assert_eq!(err.to_string(), "The global thread pool has already been initialized.",);
+}
diff --git a/compiler/rustc_thread_pool/tests/init_zero_threads.rs b/compiler/rustc_thread_pool/tests/init_zero_threads.rs
new file mode 100644
index 0000000000000..261493fcb7b7a
--- /dev/null
+++ b/compiler/rustc_thread_pool/tests/init_zero_threads.rs
@@ -0,0 +1,9 @@
+#![allow(unused_crate_dependencies)]
+
+use rustc_thread_pool::ThreadPoolBuilder;
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn init_zero_threads() {
+    ThreadPoolBuilder::new().num_threads(0).build_global().unwrap();
+}
diff --git a/compiler/rustc_thread_pool/tests/scope_join.rs b/compiler/rustc_thread_pool/tests/scope_join.rs
new file mode 100644
index 0000000000000..83468da81c013
--- /dev/null
+++ b/compiler/rustc_thread_pool/tests/scope_join.rs
@@ -0,0 +1,47 @@
+#![allow(unused_crate_dependencies)]
+
+/// Test that one can emulate join with `scope`:
+fn pseudo_join<F, G>(f: F, g: G)
+where
+    F: FnOnce() + Send,
+    G: FnOnce() + Send,
+{
+    rustc_thread_pool::scope(|s| {
+        s.spawn(|_| g());
+        f();
+    });
+}
+
+fn quick_sort<T: PartialOrd + Send>(v: &mut [T]) {
+    if v.len() <= 1 {
+        return;
+    }
+
+    let mid = partition(v);
+    let (lo, hi) = v.split_at_mut(mid);
+    pseudo_join(|| quick_sort(lo), || quick_sort(hi));
+}
+
+fn partition<T: PartialOrd + Send>(v: &mut [T]) -> usize {
+    let pivot = v.len() - 1;
+    let mut i = 0;
+    for j in 0..pivot {
+        if v[j] <= v[pivot] {
+            v.swap(i, j);
+            i += 1;
+        }
+    }
+    v.swap(i, pivot);
+    i
+}
+
+fn is_sorted<T: Send + Ord>(v: &[T]) -> bool {
+    (1..v.len()).all(|i| v[i - 1] <= v[i])
+}
+
+#[test]
+fn scope_join() {
+    let mut v: Vec<i32> = (0..256).rev().collect();
+    quick_sort(&mut v);
+    assert!(is_sorted(&v));
+}
diff --git a/compiler/rustc_thread_pool/tests/scoped_threadpool.rs b/compiler/rustc_thread_pool/tests/scoped_threadpool.rs
new file mode 100644
index 0000000000000..295da650e8805
--- /dev/null
+++ b/compiler/rustc_thread_pool/tests/scoped_threadpool.rs
@@ -0,0 +1,99 @@
+#![allow(unused_crate_dependencies)]
+
+use crossbeam_utils::thread;
+use rustc_thread_pool::ThreadPoolBuilder;
+
+#[derive(PartialEq, Eq, Debug)]
+struct Local(i32);
+
+scoped_tls::scoped_thread_local!(static LOCAL: Local);
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn missing_scoped_tls() {
+    LOCAL.set(&Local(42), || {
+        let pool = ThreadPoolBuilder::new().build().expect("thread pool created");
+
+        // `LOCAL` is not set in the pool.
+        pool.install(|| {
+            assert!(!LOCAL.is_set());
+        });
+    });
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn spawn_scoped_tls_threadpool() {
+    LOCAL.set(&Local(42), || {
+        LOCAL.with(|x| {
+            thread::scope(|scope| {
+                let pool = ThreadPoolBuilder::new()
+                    .spawn_handler(move |thread| {
+                        scope
+                            .builder()
+                            .spawn(move |_| {
+                                // Borrow the same local value in the thread pool.
+                                LOCAL.set(x, || thread.run())
+                            })
+                            .map(|_| ())
+                    })
+                    .build()
+                    .expect("thread pool created");
+
+                // The pool matches our local value.
+                pool.install(|| {
+                    assert!(LOCAL.is_set());
+                    LOCAL.with(|y| {
+                        assert_eq!(x, y);
+                    });
+                });
+
+                // If we change our local value, the pool is not affected.
+                LOCAL.set(&Local(-1), || {
+                    pool.install(|| {
+                        assert!(LOCAL.is_set());
+                        LOCAL.with(|y| {
+                            assert_eq!(x, y);
+                        });
+                    });
+                });
+            })
+            .expect("scope threads ok");
+            // `thread::scope` will wait for the threads to exit before returning.
+        });
+    });
+}
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn build_scoped_tls_threadpool() {
+    LOCAL.set(&Local(42), || {
+        LOCAL.with(|x| {
+            ThreadPoolBuilder::new()
+                .build_scoped(
+                    move |thread| LOCAL.set(x, || thread.run()),
+                    |pool| {
+                        // The pool matches our local value.
+                        pool.install(|| {
+                            assert!(LOCAL.is_set());
+                            LOCAL.with(|y| {
+                                assert_eq!(x, y);
+                            });
+                        });
+
+                        // If we change our local value, the pool is not affected.
+                        LOCAL.set(&Local(-1), || {
+                            pool.install(|| {
+                                assert!(LOCAL.is_set());
+                                LOCAL.with(|y| {
+                                    assert_eq!(x, y);
+                                });
+                            });
+                        });
+                    },
+                )
+                .expect("thread pool created");
+            // Internally, `std::thread::scope` will wait for the threads to exit before returning.
+        });
+    });
+}
diff --git a/compiler/rustc_thread_pool/tests/simple_panic.rs b/compiler/rustc_thread_pool/tests/simple_panic.rs
new file mode 100644
index 0000000000000..b35b4d632d2b9
--- /dev/null
+++ b/compiler/rustc_thread_pool/tests/simple_panic.rs
@@ -0,0 +1,9 @@
+#![allow(unused_crate_dependencies)]
+
+use rustc_thread_pool::join;
+
+#[test]
+#[should_panic(expected = "should panic")]
+fn simple_panic() {
+    join(|| {}, || panic!("should panic"));
+}
diff --git a/compiler/rustc_thread_pool/tests/stack_overflow_crash.rs b/compiler/rustc_thread_pool/tests/stack_overflow_crash.rs
new file mode 100644
index 0000000000000..805b6d8ee3f2f
--- /dev/null
+++ b/compiler/rustc_thread_pool/tests/stack_overflow_crash.rs
@@ -0,0 +1,87 @@
+#![allow(unused_crate_dependencies)]
+
+use std::env;
+#[cfg(target_os = "linux")]
+use std::os::unix::process::ExitStatusExt;
+use std::process::{Command, ExitStatus, Stdio};
+
+use rustc_thread_pool::ThreadPoolBuilder;
+
+fn force_stack_overflow(depth: u32) {
+    let mut buffer = [0u8; 1024 * 1024];
+    #[allow(clippy::incompatible_msrv)]
+    std::hint::black_box(&mut buffer);
+    if depth > 0 {
+        force_stack_overflow(depth - 1);
+    }
+}
+
+#[cfg(unix)]
+fn disable_core() {
+    unsafe {
+        libc::setrlimit(libc::RLIMIT_CORE, &libc::rlimit { rlim_cur: 0, rlim_max: 0 });
+    }
+}
+
+#[cfg(unix)]
+fn overflow_code() -> Option<i32> {
+    None
+}
+
+#[cfg(windows)]
+fn overflow_code() -> Option<i32> {
+    use std::os::windows::process::ExitStatusExt;
+
+    ExitStatus::from_raw(0xc00000fd /*STATUS_STACK_OVERFLOW*/).code()
+}
+
+#[test]
+#[cfg_attr(not(any(unix, windows)), ignore)]
+fn stack_overflow_crash() {
+    // First check that the recursive call actually causes a stack overflow,
+    // and does not get optimized away.
+    let status = run_ignored("run_with_small_stack");
+    assert!(!status.success());
+    #[cfg(any(unix, windows))]
+    assert_eq!(status.code(), overflow_code());
+    #[cfg(target_os = "linux")]
+    assert!(matches!(status.signal(), Some(libc::SIGABRT | libc::SIGSEGV)));
+
+    // Now run with a larger stack and verify correct operation.
+    let status = run_ignored("run_with_large_stack");
+    assert_eq!(status.code(), Some(0));
+    #[cfg(target_os = "linux")]
+    assert_eq!(status.signal(), None);
+}
+
+fn run_ignored(test: &str) -> ExitStatus {
+    Command::new(env::current_exe().unwrap())
+        .arg("--ignored")
+        .arg("--exact")
+        .arg(test)
+        .stdout(Stdio::null())
+        .stderr(Stdio::null())
+        .status()
+        .unwrap()
+}
+
+#[test]
+#[ignore]
+fn run_with_small_stack() {
+    run_with_stack(8);
+}
+
+#[test]
+#[ignore]
+fn run_with_large_stack() {
+    run_with_stack(48);
+}
+
+fn run_with_stack(stack_size_in_mb: usize) {
+    let pool = ThreadPoolBuilder::new().stack_size(stack_size_in_mb * 1024 * 1024).build().unwrap();
+    pool.install(|| {
+        #[cfg(unix)]
+        disable_core();
+        force_stack_overflow(32);
+    });
+}
diff --git a/library/core/src/ascii.rs b/library/core/src/ascii.rs
index 5b3711b4071ab..d3c6c046e717f 100644
--- a/library/core/src/ascii.rs
+++ b/library/core/src/ascii.rs
@@ -9,9 +9,10 @@
 
 #![stable(feature = "core_ascii", since = "1.26.0")]
 
+use crate::escape::{AlwaysEscaped, EscapeIterInner};
+use crate::fmt;
 use crate::iter::FusedIterator;
 use crate::num::NonZero;
-use crate::{escape, fmt};
 
 mod ascii_char;
 #[unstable(feature = "ascii_char", issue = "110998")]
@@ -24,7 +25,7 @@ pub use ascii_char::AsciiChar as Char;
 #[must_use = "iterators are lazy and do nothing unless consumed"]
 #[stable(feature = "rust1", since = "1.0.0")]
 #[derive(Clone)]
-pub struct EscapeDefault(escape::EscapeIterInner<4>);
+pub struct EscapeDefault(EscapeIterInner<4, AlwaysEscaped>);
 
 /// Returns an iterator that produces an escaped version of a `u8`.
 ///
@@ -96,17 +97,12 @@ pub fn escape_default(c: u8) -> EscapeDefault {
 impl EscapeDefault {
     #[inline]
     pub(crate) const fn new(c: u8) -> Self {
-        Self(escape::EscapeIterInner::ascii(c))
+        Self(EscapeIterInner::ascii(c))
     }
 
     #[inline]
     pub(crate) fn empty() -> Self {
-        Self(escape::EscapeIterInner::empty())
-    }
-
-    #[inline]
-    pub(crate) fn as_str(&self) -> &str {
-        self.0.as_str()
+        Self(EscapeIterInner::empty())
     }
 }
 
@@ -168,7 +164,7 @@ impl FusedIterator for EscapeDefault {}
 #[stable(feature = "ascii_escape_display", since = "1.39.0")]
 impl fmt::Display for EscapeDefault {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.write_str(self.0.as_str())
+        fmt::Display::fmt(&self.0, f)
     }
 }
 
diff --git a/library/core/src/char/mod.rs b/library/core/src/char/mod.rs
index 5b9f0e2143f5d..82a3f6f916be3 100644
--- a/library/core/src/char/mod.rs
+++ b/library/core/src/char/mod.rs
@@ -44,7 +44,7 @@ pub use self::methods::{encode_utf8_raw, encode_utf8_raw_unchecked}; // perma-un
 use crate::ascii;
 pub(crate) use self::methods::EscapeDebugExtArgs;
 use crate::error::Error;
-use crate::escape;
+use crate::escape::{AlwaysEscaped, EscapeIterInner, MaybeEscaped};
 use crate::fmt::{self, Write};
 use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce};
 use crate::num::NonZero;
@@ -161,12 +161,12 @@ pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
 /// [`escape_unicode`]: char::escape_unicode
 #[derive(Clone, Debug)]
 #[stable(feature = "rust1", since = "1.0.0")]
-pub struct EscapeUnicode(escape::EscapeIterInner<10>);
+pub struct EscapeUnicode(EscapeIterInner<10, AlwaysEscaped>);
 
 impl EscapeUnicode {
     #[inline]
     const fn new(c: char) -> Self {
-        Self(escape::EscapeIterInner::unicode(c))
+        Self(EscapeIterInner::unicode(c))
     }
 }
 
@@ -215,7 +215,7 @@ impl FusedIterator for EscapeUnicode {}
 #[stable(feature = "char_struct_display", since = "1.16.0")]
 impl fmt::Display for EscapeUnicode {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.write_str(self.0.as_str())
+        fmt::Display::fmt(&self.0, f)
     }
 }
 
@@ -227,22 +227,22 @@ impl fmt::Display for EscapeUnicode {
 /// [`escape_default`]: char::escape_default
 #[derive(Clone, Debug)]
 #[stable(feature = "rust1", since = "1.0.0")]
-pub struct EscapeDefault(escape::EscapeIterInner<10>);
+pub struct EscapeDefault(EscapeIterInner<10, AlwaysEscaped>);
 
 impl EscapeDefault {
     #[inline]
     const fn printable(c: ascii::Char) -> Self {
-        Self(escape::EscapeIterInner::ascii(c.to_u8()))
+        Self(EscapeIterInner::ascii(c.to_u8()))
     }
 
     #[inline]
     const fn backslash(c: ascii::Char) -> Self {
-        Self(escape::EscapeIterInner::backslash(c))
+        Self(EscapeIterInner::backslash(c))
     }
 
     #[inline]
     const fn unicode(c: char) -> Self {
-        Self(escape::EscapeIterInner::unicode(c))
+        Self(EscapeIterInner::unicode(c))
     }
 }
 
@@ -290,8 +290,9 @@ impl FusedIterator for EscapeDefault {}
 
 #[stable(feature = "char_struct_display", since = "1.16.0")]
 impl fmt::Display for EscapeDefault {
+    #[inline]
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.write_str(self.0.as_str())
+        fmt::Display::fmt(&self.0, f)
     }
 }
 
@@ -303,37 +304,22 @@ impl fmt::Display for EscapeDefault {
 /// [`escape_debug`]: char::escape_debug
 #[stable(feature = "char_escape_debug", since = "1.20.0")]
 #[derive(Clone, Debug)]
-pub struct EscapeDebug(EscapeDebugInner);
-
-#[derive(Clone, Debug)]
-// Note: It’s possible to manually encode the EscapeDebugInner inside of
-// EscapeIterInner (e.g. with alive=254..255 indicating that data[0..4] holds
-// a char) which would likely result in a more optimised code.  For now we use
-// the option easier to implement.
-enum EscapeDebugInner {
-    Bytes(escape::EscapeIterInner<10>),
-    Char(char),
-}
+pub struct EscapeDebug(EscapeIterInner<10, MaybeEscaped>);
 
 impl EscapeDebug {
     #[inline]
     const fn printable(chr: char) -> Self {
-        Self(EscapeDebugInner::Char(chr))
+        Self(EscapeIterInner::printable(chr))
     }
 
     #[inline]
     const fn backslash(c: ascii::Char) -> Self {
-        Self(EscapeDebugInner::Bytes(escape::EscapeIterInner::backslash(c)))
+        Self(EscapeIterInner::backslash(c))
     }
 
     #[inline]
     const fn unicode(c: char) -> Self {
-        Self(EscapeDebugInner::Bytes(escape::EscapeIterInner::unicode(c)))
-    }
-
-    #[inline]
-    fn clear(&mut self) {
-        self.0 = EscapeDebugInner::Bytes(escape::EscapeIterInner::empty());
+        Self(EscapeIterInner::unicode(c))
     }
 }
 
@@ -343,13 +329,7 @@ impl Iterator for EscapeDebug {
 
     #[inline]
     fn next(&mut self) -> Option<char> {
-        match self.0 {
-            EscapeDebugInner::Bytes(ref mut bytes) => bytes.next().map(char::from),
-            EscapeDebugInner::Char(chr) => {
-                self.clear();
-                Some(chr)
-            }
-        }
+        self.0.next()
     }
 
     #[inline]
@@ -367,10 +347,7 @@ impl Iterator for EscapeDebug {
 #[stable(feature = "char_escape_debug", since = "1.20.0")]
 impl ExactSizeIterator for EscapeDebug {
     fn len(&self) -> usize {
-        match &self.0 {
-            EscapeDebugInner::Bytes(bytes) => bytes.len(),
-            EscapeDebugInner::Char(_) => 1,
-        }
+        self.0.len()
     }
 }
 
@@ -379,11 +356,9 @@ impl FusedIterator for EscapeDebug {}
 
 #[stable(feature = "char_escape_debug", since = "1.20.0")]
 impl fmt::Display for EscapeDebug {
+    #[inline]
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match &self.0 {
-            EscapeDebugInner::Bytes(bytes) => f.write_str(bytes.as_str()),
-            EscapeDebugInner::Char(chr) => f.write_char(*chr),
-        }
+        fmt::Display::fmt(&self.0, f)
     }
 }
 
@@ -480,6 +455,7 @@ macro_rules! casemappingiter_impls {
 
         #[stable(feature = "char_struct_display", since = "1.16.0")]
         impl fmt::Display for $ITER_NAME {
+            #[inline]
             fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                 fmt::Display::fmt(&self.0, f)
             }
diff --git a/library/core/src/escape.rs b/library/core/src/escape.rs
index 0c3329f676eeb..f459c58270818 100644
--- a/library/core/src/escape.rs
+++ b/library/core/src/escape.rs
@@ -1,11 +1,16 @@
 //! Helper code for character escaping.
 
 use crate::ascii;
+use crate::fmt::{self, Write};
+use crate::marker::PhantomData;
 use crate::num::NonZero;
 use crate::ops::Range;
 
 const HEX_DIGITS: [ascii::Char; 16] = *b"0123456789abcdef".as_ascii().unwrap();
 
+/// Escapes a character with `\x` representation.
+///
+/// Returns a buffer with the escaped representation and its corresponding range.
 #[inline]
 const fn backslash<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u8>) {
     const { assert!(N >= 2) };
@@ -18,6 +23,9 @@ const fn backslash<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u
     (output, 0..2)
 }
 
+/// Escapes a character with `\xNN` representation.
+///
+/// Returns a buffer with the escaped representation and its corresponding range.
 #[inline]
 const fn hex_escape<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
     const { assert!(N >= 4) };
@@ -35,6 +43,7 @@ const fn hex_escape<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
     (output, 0..4)
 }
 
+/// Returns a buffer with the verbatim character and its corresponding range.
 #[inline]
 const fn verbatim<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u8>) {
     const { assert!(N >= 1) };
@@ -48,7 +57,7 @@ const fn verbatim<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u8
 
 /// Escapes an ASCII character.
 ///
-/// Returns a buffer and the length of the escaped representation.
+/// Returns a buffer with the escaped representation and its corresponding range.
 const fn escape_ascii<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
     const { assert!(N >= 4) };
 
@@ -122,9 +131,9 @@ const fn escape_ascii<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>)
     }
 }
 
-/// Escapes a character `\u{NNNN}` representation.
+/// Escapes a character with `\u{NNNN}` representation.
 ///
-/// Returns a buffer and the length of the escaped representation.
+/// Returns a buffer with the escaped representation and its corresponding range.
 const fn escape_unicode<const N: usize>(c: char) -> ([ascii::Char; N], Range<u8>) {
     const { assert!(N >= 10 && N < u8::MAX as usize) };
 
@@ -149,77 +158,214 @@ const fn escape_unicode<const N: usize>(c: char) -> ([ascii::Char; N], Range<u8>
     (output, (start as u8)..(N as u8))
 }
 
-/// An iterator over an fixed-size array.
-///
-/// This is essentially equivalent to array’s IntoIter except that indexes are
-/// limited to u8 to reduce size of the structure.
-#[derive(Clone, Debug)]
-pub(crate) struct EscapeIterInner<const N: usize> {
-    // The element type ensures this is always ASCII, and thus also valid UTF-8.
-    data: [ascii::Char; N],
-
-    // Invariant: `alive.start <= alive.end <= N`
+#[derive(Clone, Copy)]
+union MaybeEscapedCharacter<const N: usize> {
+    pub escape_seq: [ascii::Char; N],
+    pub literal: char,
+}
+
+/// Marker type to indicate that the character is always escaped,
+/// used to optimize the iterator implementation.
+#[derive(Clone, Copy)]
+#[non_exhaustive]
+pub(crate) struct AlwaysEscaped;
+
+/// Marker type to indicate that the character may be escaped,
+/// used to optimize the iterator implementation.
+#[derive(Clone, Copy)]
+#[non_exhaustive]
+pub(crate) struct MaybeEscaped;
+
+/// An iterator over a possibly escaped character.
+#[derive(Clone)]
+pub(crate) struct EscapeIterInner<const N: usize, ESCAPING> {
+    // Invariant:
+    //
+    // If `alive.end <= Self::LITERAL_ESCAPE_START`, `data` must contain
+    // printable ASCII characters in the `alive` range of its `escape_seq` variant.
+    //
+    // If `alive.end > Self::LITERAL_ESCAPE_START`, `data` must contain a
+    // `char` in its `literal` variant, and the `alive` range must have a
+    // length of at most `1`.
+    data: MaybeEscapedCharacter<N>,
     alive: Range<u8>,
+    escaping: PhantomData<ESCAPING>,
 }
 
-impl<const N: usize> EscapeIterInner<N> {
+impl<const N: usize, ESCAPING> EscapeIterInner<N, ESCAPING> {
+    const LITERAL_ESCAPE_START: u8 = 128;
+
+    /// # Safety
+    ///
+    /// `data.escape_seq` must contain an escape sequence in the range given by `alive`.
+    #[inline]
+    const unsafe fn new(data: MaybeEscapedCharacter<N>, alive: Range<u8>) -> Self {
+        // Longer escape sequences are not useful given `alive.end` is at most
+        // `Self::LITERAL_ESCAPE_START`.
+        const { assert!(N < Self::LITERAL_ESCAPE_START as usize) };
+
+        // Check bounds, which implicitly also checks the invariant
+        // `alive.end <= Self::LITERAL_ESCAPE_START`.
+        debug_assert!(alive.end <= (N + 1) as u8);
+
+        Self { data, alive, escaping: PhantomData }
+    }
+
     pub(crate) const fn backslash(c: ascii::Char) -> Self {
-        let (data, range) = backslash(c);
-        Self { data, alive: range }
+        let (escape_seq, alive) = backslash(c);
+        // SAFETY: `escape_seq` contains an escape sequence in the range given by `alive`.
+        unsafe { Self::new(MaybeEscapedCharacter { escape_seq }, alive) }
     }
 
     pub(crate) const fn ascii(c: u8) -> Self {
-        let (data, range) = escape_ascii(c);
-        Self { data, alive: range }
+        let (escape_seq, alive) = escape_ascii(c);
+        // SAFETY: `escape_seq` contains an escape sequence in the range given by `alive`.
+        unsafe { Self::new(MaybeEscapedCharacter { escape_seq }, alive) }
     }
 
     pub(crate) const fn unicode(c: char) -> Self {
-        let (data, range) = escape_unicode(c);
-        Self { data, alive: range }
+        let (escape_seq, alive) = escape_unicode(c);
+        // SAFETY: `escape_seq` contains an escape sequence in the range given by `alive`.
+        unsafe { Self::new(MaybeEscapedCharacter { escape_seq }, alive) }
     }
 
     #[inline]
     pub(crate) const fn empty() -> Self {
-        Self { data: [ascii::Char::Null; N], alive: 0..0 }
+        // SAFETY: `0..0` ensures an empty escape sequence.
+        unsafe { Self::new(MaybeEscapedCharacter { escape_seq: [ascii::Char::Null; N] }, 0..0) }
     }
 
     #[inline]
-    pub(crate) fn as_ascii(&self) -> &[ascii::Char] {
-        // SAFETY: `self.alive` is guaranteed to be a valid range for indexing `self.data`.
-        unsafe {
-            self.data.get_unchecked(usize::from(self.alive.start)..usize::from(self.alive.end))
-        }
+    pub(crate) fn len(&self) -> usize {
+        usize::from(self.alive.end - self.alive.start)
     }
 
     #[inline]
-    pub(crate) fn as_str(&self) -> &str {
-        self.as_ascii().as_str()
+    pub(crate) fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
+        self.alive.advance_by(n)
     }
 
     #[inline]
-    pub(crate) fn len(&self) -> usize {
-        usize::from(self.alive.end - self.alive.start)
+    pub(crate) fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
+        self.alive.advance_back_by(n)
+    }
+
+    /// Returns a `char` if `self.data` contains one in its `literal` variant.
+    #[inline]
+    const fn to_char(&self) -> Option<char> {
+        if self.alive.end > Self::LITERAL_ESCAPE_START {
+            // SAFETY: We just checked that `self.data` contains a `char` in
+            //         its `literal` variant.
+            return Some(unsafe { self.data.literal });
+        }
+
+        None
     }
 
+    /// Returns the printable ASCII characters in the `escape_seq` variant of `self.data`
+    /// as a string.
+    ///
+    /// # Safety
+    ///
+    /// - `self.data` must contain printable ASCII characters in its `escape_seq` variant.
+    /// - `self.alive` must be a valid range for `self.data.escape_seq`.
+    #[inline]
+    unsafe fn to_str_unchecked(&self) -> &str {
+        debug_assert!(self.alive.end <= Self::LITERAL_ESCAPE_START);
+
+        // SAFETY: The caller guarantees `self.data` contains printable ASCII
+        //         characters in its `escape_seq` variant, and `self.alive` is
+        //         a valid range for `self.data.escape_seq`.
+        unsafe {
+            self.data
+                .escape_seq
+                .get_unchecked(usize::from(self.alive.start)..usize::from(self.alive.end))
+                .as_str()
+        }
+    }
+}
+
+impl<const N: usize> EscapeIterInner<N, AlwaysEscaped> {
     pub(crate) fn next(&mut self) -> Option<u8> {
         let i = self.alive.next()?;
 
-        // SAFETY: `i` is guaranteed to be a valid index for `self.data`.
-        unsafe { Some(self.data.get_unchecked(usize::from(i)).to_u8()) }
+        // SAFETY: The `AlwaysEscaped` marker guarantees that `self.data`
+        //         contains printable ASCII characters in its `escape_seq`
+        //         variant, and `i` is guaranteed to be a valid index for
+        //         `self.data.escape_seq`.
+        unsafe { Some(self.data.escape_seq.get_unchecked(usize::from(i)).to_u8()) }
     }
 
     pub(crate) fn next_back(&mut self) -> Option<u8> {
         let i = self.alive.next_back()?;
 
-        // SAFETY: `i` is guaranteed to be a valid index for `self.data`.
-        unsafe { Some(self.data.get_unchecked(usize::from(i)).to_u8()) }
+        // SAFETY: The `AlwaysEscaped` marker guarantees that `self.data`
+        //         contains printable ASCII characters in its `escape_seq`
+        //         variant, and `i` is guaranteed to be a valid index for
+        //         `self.data.escape_seq`.
+        unsafe { Some(self.data.escape_seq.get_unchecked(usize::from(i)).to_u8()) }
     }
+}
 
-    pub(crate) fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
-        self.alive.advance_by(n)
+impl<const N: usize> EscapeIterInner<N, MaybeEscaped> {
+    // This is the only way to create any `EscapeIterInner` containing a `char` in
+    // the `literal` variant of its `self.data`, meaning the `AlwaysEscaped` marker
+    // guarantees that `self.data` contains printable ASCII characters in its
+    // `escape_seq` variant.
+    pub(crate) const fn printable(c: char) -> Self {
+        Self {
+            data: MaybeEscapedCharacter { literal: c },
+            // Uphold the invariant `alive.end > Self::LITERAL_ESCAPE_START`, and ensure
+            // `len` behaves correctly for iterating through one character literal.
+            alive: Self::LITERAL_ESCAPE_START..(Self::LITERAL_ESCAPE_START + 1),
+            escaping: PhantomData,
+        }
     }
 
-    pub(crate) fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
-        self.alive.advance_back_by(n)
+    pub(crate) fn next(&mut self) -> Option<char> {
+        let i = self.alive.next()?;
+
+        if let Some(c) = self.to_char() {
+            return Some(c);
+        }
+
+        // SAFETY: At this point, `self.data` must contain printable ASCII
+        //         characters in its `escape_seq` variant, and `i` is
+        //         guaranteed to be a valid index for `self.data.escape_seq`.
+        Some(char::from(unsafe { self.data.escape_seq.get_unchecked(usize::from(i)).to_u8() }))
+    }
+}
+
+impl<const N: usize> fmt::Display for EscapeIterInner<N, AlwaysEscaped> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        // SAFETY: The `AlwaysEscaped` marker guarantees that `self.data`
+        //         contains printable ASCII chars, and `self.alive` is
+        //         guaranteed to be a valid range for `self.data`.
+        f.write_str(unsafe { self.to_str_unchecked() })
+    }
+}
+
+impl<const N: usize> fmt::Display for EscapeIterInner<N, MaybeEscaped> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        if let Some(c) = self.to_char() {
+            return f.write_char(c);
+        }
+
+        // SAFETY: At this point, `self.data` must contain printable ASCII
+        //         characters in its `escape_seq` variant, and `self.alive`
+        //         is guaranteed to be a valid range for `self.data`.
+        f.write_str(unsafe { self.to_str_unchecked() })
+    }
+}
+
+impl<const N: usize> fmt::Debug for EscapeIterInner<N, AlwaysEscaped> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_tuple("EscapeIterInner").field(&format_args!("'{}'", self)).finish()
+    }
+}
+
+impl<const N: usize> fmt::Debug for EscapeIterInner<N, MaybeEscaped> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_tuple("EscapeIterInner").field(&format_args!("'{}'", self)).finish()
     }
 }
diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs
index 88855831788db..6231c3425c880 100644
--- a/library/core/src/lib.rs
+++ b/library/core/src/lib.rs
@@ -153,7 +153,6 @@
 #![feature(f16)]
 #![feature(freeze_impls)]
 #![feature(fundamental)]
-#![feature(generic_arg_infer)]
 #![feature(if_let_guard)]
 #![feature(intra_doc_pointers)]
 #![feature(intrinsics)]
diff --git a/library/core/src/slice/ascii.rs b/library/core/src/slice/ascii.rs
index b4d9a1b1ca4fd..181ae82959c66 100644
--- a/library/core/src/slice/ascii.rs
+++ b/library/core/src/slice/ascii.rs
@@ -308,7 +308,7 @@ impl<'a> fmt::Display for EscapeAscii<'a> {
 
             if let Some(&b) = bytes.first() {
                 // guaranteed to be non-empty, better to write it as a str
-                f.write_str(ascii::escape_default(b).as_str())?;
+                fmt::Display::fmt(&ascii::escape_default(b), f)?;
                 bytes = &bytes[1..];
             }
         }
diff --git a/library/std/src/path.rs b/library/std/src/path.rs
index 0469db0814c1d..07f212b113564 100644
--- a/library/std/src/path.rs
+++ b/library/std/src/path.rs
@@ -1316,8 +1316,17 @@ impl PathBuf {
             need_sep = false
         }
 
+        let need_clear = if cfg!(target_os = "cygwin") {
+            // If path is absolute and its prefix is none, it is like `/foo`,
+            // and will be handled below.
+            path.prefix().is_some()
+        } else {
+            // On Unix: prefix is always None.
+            path.is_absolute() || path.prefix().is_some()
+        };
+
         // absolute `path` replaces `self`
-        if path.is_absolute() || path.prefix().is_some() {
+        if need_clear {
             self.inner.truncate(0);
 
         // verbatim paths need . and .. removed
@@ -3643,6 +3652,11 @@ impl Error for NormalizeError {}
 /// paths, this is currently equivalent to calling
 /// [`GetFullPathNameW`][windows-path].
 ///
+/// On Cygwin, this is currently equivalent to calling [`cygwin_conv_path`][cygwin-path]
+/// with mode `CCP_WIN_A_TO_POSIX`, and then being processed like other POSIX platforms.
+/// If a Windows path is given, it will be converted to an absolute POSIX path without
+/// keeping `..`.
+///
 /// Note that these [may change in the future][changes].
 ///
 /// # Errors
@@ -3700,6 +3714,7 @@ impl Error for NormalizeError {}
 /// [changes]: io#platform-specific-behavior
 /// [posix-semantics]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
 /// [windows-path]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew
+/// [cygwin-path]: https://cygwin.com/cygwin-api/func-cygwin-conv-path.html
 #[stable(feature = "absolute_path", since = "1.79.0")]
 pub fn absolute<P: AsRef<Path>>(path: P) -> io::Result<PathBuf> {
     let path = path.as_ref();
diff --git a/library/std/src/sys/path/cygwin.rs b/library/std/src/sys/path/cygwin.rs
new file mode 100644
index 0000000000000..e90372805bbf9
--- /dev/null
+++ b/library/std/src/sys/path/cygwin.rs
@@ -0,0 +1,92 @@
+use crate::ffi::OsString;
+use crate::os::unix::ffi::OsStringExt;
+use crate::path::{Path, PathBuf};
+use crate::sys::common::small_c_string::run_path_with_cstr;
+use crate::sys::cvt;
+use crate::{io, ptr};
+
+#[inline]
+pub fn is_sep_byte(b: u8) -> bool {
+    b == b'/' || b == b'\\'
+}
+
+/// Cygwin allways prefers `/` over `\`, and it always converts all `/` to `\`
+/// internally when calling Win32 APIs. Therefore, the server component of path
+/// `\\?\UNC\localhost/share` is `localhost/share` on Win32, but `localhost`
+/// on Cygwin.
+#[inline]
+pub fn is_verbatim_sep(b: u8) -> bool {
+    b == b'/' || b == b'\\'
+}
+
+pub use super::windows_prefix::parse_prefix;
+
+pub const MAIN_SEP_STR: &str = "/";
+pub const MAIN_SEP: char = '/';
+
+unsafe extern "C" {
+    // Doc: https://cygwin.com/cygwin-api/func-cygwin-conv-path.html
+    // Src: https://github.com/cygwin/cygwin/blob/718a15ba50e0d01c79800bd658c2477f9a603540/winsup/cygwin/path.cc#L3902
+    // Safety:
+    // * `what` should be `CCP_WIN_A_TO_POSIX` here
+    // * `from` is null-terminated UTF-8 path
+    // * `to` is buffer, the buffer size is `size`.
+    //
+    // Converts a path to an absolute POSIX path, no matter the input is Win32 path or POSIX path.
+    fn cygwin_conv_path(
+        what: libc::c_uint,
+        from: *const libc::c_char,
+        to: *mut u8,
+        size: libc::size_t,
+    ) -> libc::ssize_t;
+}
+
+const CCP_WIN_A_TO_POSIX: libc::c_uint = 2;
+
+/// Make a POSIX path absolute.
+pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> {
+    run_path_with_cstr(path, &|path| {
+        let conv = CCP_WIN_A_TO_POSIX;
+        let size = cvt(unsafe { cygwin_conv_path(conv, path.as_ptr(), ptr::null_mut(), 0) })?;
+        // If success, size should not be 0.
+        debug_assert!(size >= 1);
+        let size = size as usize;
+        let mut buffer = Vec::with_capacity(size);
+        cvt(unsafe { cygwin_conv_path(conv, path.as_ptr(), buffer.as_mut_ptr(), size) })?;
+        unsafe {
+            buffer.set_len(size - 1);
+        }
+        Ok(PathBuf::from(OsString::from_vec(buffer)))
+    })
+    .map(|path| {
+        if path.prefix().is_some() {
+            return path;
+        }
+
+        // From unix.rs
+        let mut components = path.components();
+        let path_os = path.as_os_str().as_encoded_bytes();
+
+        let mut normalized = if path_os.starts_with(b"//") && !path_os.starts_with(b"///") {
+            components.next();
+            PathBuf::from("//")
+        } else {
+            PathBuf::new()
+        };
+        normalized.extend(components);
+
+        if path_os.ends_with(b"/") {
+            normalized.push("");
+        }
+
+        normalized
+    })
+}
+
+pub(crate) fn is_absolute(path: &Path) -> bool {
+    if path.as_os_str().as_encoded_bytes().starts_with(b"\\") {
+        path.has_root() && path.prefix().is_some()
+    } else {
+        path.has_root()
+    }
+}
diff --git a/library/std/src/sys/path/mod.rs b/library/std/src/sys/path/mod.rs
index 1fa4e80d6780c..a4ff4338cf5f4 100644
--- a/library/std/src/sys/path/mod.rs
+++ b/library/std/src/sys/path/mod.rs
@@ -1,6 +1,7 @@
 cfg_if::cfg_if! {
     if #[cfg(target_os = "windows")] {
         mod windows;
+        mod windows_prefix;
         pub use windows::*;
     } else if #[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] {
         mod sgx;
@@ -11,6 +12,10 @@ cfg_if::cfg_if! {
     } else if #[cfg(target_os = "uefi")] {
         mod uefi;
         pub use uefi::*;
+    } else if #[cfg(target_os = "cygwin")] {
+        mod cygwin;
+        mod windows_prefix;
+        pub use cygwin::*;
     } else {
         mod unix;
         pub use unix::*;
diff --git a/library/std/src/sys/path/windows.rs b/library/std/src/sys/path/windows.rs
index e0e003f6a8192..f124e1e5a71c7 100644
--- a/library/std/src/sys/path/windows.rs
+++ b/library/std/src/sys/path/windows.rs
@@ -1,5 +1,5 @@
 use crate::ffi::{OsStr, OsString};
-use crate::path::{Path, PathBuf, Prefix};
+use crate::path::{Path, PathBuf};
 use crate::sys::api::utf16;
 use crate::sys::pal::{c, fill_utf16_buf, os2path, to_u16s};
 use crate::{io, ptr};
@@ -7,6 +7,8 @@ use crate::{io, ptr};
 #[cfg(test)]
 mod tests;
 
+pub use super::windows_prefix::parse_prefix;
+
 pub const MAIN_SEP_STR: &str = "\\";
 pub const MAIN_SEP: char = '\\';
 
@@ -77,177 +79,6 @@ pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf {
     path.into()
 }
 
-struct PrefixParser<'a, const LEN: usize> {
-    path: &'a OsStr,
-    prefix: [u8; LEN],
-}
-
-impl<'a, const LEN: usize> PrefixParser<'a, LEN> {
-    #[inline]
-    fn get_prefix(path: &OsStr) -> [u8; LEN] {
-        let mut prefix = [0; LEN];
-        // SAFETY: Only ASCII characters are modified.
-        for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() {
-            prefix[i] = if ch == b'/' { b'\\' } else { ch };
-        }
-        prefix
-    }
-
-    fn new(path: &'a OsStr) -> Self {
-        Self { path, prefix: Self::get_prefix(path) }
-    }
-
-    fn as_slice(&self) -> PrefixParserSlice<'a, '_> {
-        PrefixParserSlice {
-            path: self.path,
-            prefix: &self.prefix[..LEN.min(self.path.len())],
-            index: 0,
-        }
-    }
-}
-
-struct PrefixParserSlice<'a, 'b> {
-    path: &'a OsStr,
-    prefix: &'b [u8],
-    index: usize,
-}
-
-impl<'a> PrefixParserSlice<'a, '_> {
-    fn strip_prefix(&self, prefix: &str) -> Option<Self> {
-        self.prefix[self.index..]
-            .starts_with(prefix.as_bytes())
-            .then_some(Self { index: self.index + prefix.len(), ..*self })
-    }
-
-    fn prefix_bytes(&self) -> &'a [u8] {
-        &self.path.as_encoded_bytes()[..self.index]
-    }
-
-    fn finish(self) -> &'a OsStr {
-        // SAFETY: The unsafety here stems from converting between &OsStr and
-        // &[u8] and back. This is safe to do because (1) we only look at ASCII
-        // contents of the encoding and (2) new &OsStr values are produced only
-        // from ASCII-bounded slices of existing &OsStr values.
-        unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) }
-    }
-}
-
-pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
-    use Prefix::{DeviceNS, Disk, UNC, Verbatim, VerbatimDisk, VerbatimUNC};
-
-    let parser = PrefixParser::<8>::new(path);
-    let parser = parser.as_slice();
-    if let Some(parser) = parser.strip_prefix(r"\\") {
-        // \\
-
-        // The meaning of verbatim paths can change when they use a different
-        // separator.
-        if let Some(parser) = parser.strip_prefix(r"?\")
-            && !parser.prefix_bytes().iter().any(|&x| x == b'/')
-        {
-            // \\?\
-            if let Some(parser) = parser.strip_prefix(r"UNC\") {
-                // \\?\UNC\server\share
-
-                let path = parser.finish();
-                let (server, path) = parse_next_component(path, true);
-                let (share, _) = parse_next_component(path, true);
-
-                Some(VerbatimUNC(server, share))
-            } else {
-                let path = parser.finish();
-
-                // in verbatim paths only recognize an exact drive prefix
-                if let Some(drive) = parse_drive_exact(path) {
-                    // \\?\C:
-                    Some(VerbatimDisk(drive))
-                } else {
-                    // \\?\prefix
-                    let (prefix, _) = parse_next_component(path, true);
-                    Some(Verbatim(prefix))
-                }
-            }
-        } else if let Some(parser) = parser.strip_prefix(r".\") {
-            // \\.\COM42
-            let path = parser.finish();
-            let (prefix, _) = parse_next_component(path, false);
-            Some(DeviceNS(prefix))
-        } else {
-            let path = parser.finish();
-            let (server, path) = parse_next_component(path, false);
-            let (share, _) = parse_next_component(path, false);
-
-            if !server.is_empty() && !share.is_empty() {
-                // \\server\share
-                Some(UNC(server, share))
-            } else {
-                // no valid prefix beginning with "\\" recognized
-                None
-            }
-        }
-    } else {
-        // If it has a drive like `C:` then it's a disk.
-        // Otherwise there is no prefix.
-        parse_drive(path).map(Disk)
-    }
-}
-
-// Parses a drive prefix, e.g. "C:" and "C:\whatever"
-fn parse_drive(path: &OsStr) -> Option<u8> {
-    // In most DOS systems, it is not possible to have more than 26 drive letters.
-    // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
-    fn is_valid_drive_letter(drive: &u8) -> bool {
-        drive.is_ascii_alphabetic()
-    }
-
-    match path.as_encoded_bytes() {
-        [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
-        _ => None,
-    }
-}
-
-// Parses a drive prefix exactly, e.g. "C:"
-fn parse_drive_exact(path: &OsStr) -> Option<u8> {
-    // only parse two bytes: the drive letter and the drive separator
-    if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) {
-        parse_drive(path)
-    } else {
-        None
-    }
-}
-
-// Parse the next path component.
-//
-// Returns the next component and the rest of the path excluding the component and separator.
-// Does not recognize `/` as a separator character if `verbatim` is true.
-fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) {
-    let separator = if verbatim { is_verbatim_sep } else { is_sep_byte };
-
-    match path.as_encoded_bytes().iter().position(|&x| separator(x)) {
-        Some(separator_start) => {
-            let separator_end = separator_start + 1;
-
-            let component = &path.as_encoded_bytes()[..separator_start];
-
-            // Panic safe
-            // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index.
-            let path = &path.as_encoded_bytes()[separator_end..];
-
-            // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\')
-            // is encoded in a single byte, therefore `bytes[separator_start]` and
-            // `bytes[separator_end]` must be code point boundaries and thus
-            // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices.
-            unsafe {
-                (
-                    OsStr::from_encoded_bytes_unchecked(component),
-                    OsStr::from_encoded_bytes_unchecked(path),
-                )
-            }
-        }
-        None => (path, OsStr::new("")),
-    }
-}
-
 /// Returns a UTF-16 encoded path capable of bypassing the legacy `MAX_PATH` limits.
 ///
 /// This path may or may not have a verbatim prefix.
diff --git a/library/std/src/sys/path/windows/tests.rs b/library/std/src/sys/path/windows/tests.rs
index 9eb79203dcac7..830f48d7bfc94 100644
--- a/library/std/src/sys/path/windows/tests.rs
+++ b/library/std/src/sys/path/windows/tests.rs
@@ -1,4 +1,6 @@
+use super::super::windows_prefix::*;
 use super::*;
+use crate::path::Prefix;
 
 #[test]
 fn test_parse_next_component() {
diff --git a/library/std/src/sys/path/windows_prefix.rs b/library/std/src/sys/path/windows_prefix.rs
new file mode 100644
index 0000000000000..b9dfe754485ab
--- /dev/null
+++ b/library/std/src/sys/path/windows_prefix.rs
@@ -0,0 +1,182 @@
+//! Parse Windows prefixes, for both Windows and Cygwin.
+
+use super::{is_sep_byte, is_verbatim_sep};
+use crate::ffi::OsStr;
+use crate::path::Prefix;
+
+struct PrefixParser<'a, const LEN: usize> {
+    path: &'a OsStr,
+    prefix: [u8; LEN],
+}
+
+impl<'a, const LEN: usize> PrefixParser<'a, LEN> {
+    #[inline]
+    fn get_prefix(path: &OsStr) -> [u8; LEN] {
+        let mut prefix = [0; LEN];
+        // SAFETY: Only ASCII characters are modified.
+        for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() {
+            prefix[i] = if ch == b'/' { b'\\' } else { ch };
+        }
+        prefix
+    }
+
+    fn new(path: &'a OsStr) -> Self {
+        Self { path, prefix: Self::get_prefix(path) }
+    }
+
+    fn as_slice(&self) -> PrefixParserSlice<'a, '_> {
+        PrefixParserSlice {
+            path: self.path,
+            prefix: &self.prefix[..LEN.min(self.path.len())],
+            index: 0,
+        }
+    }
+}
+
+struct PrefixParserSlice<'a, 'b> {
+    path: &'a OsStr,
+    prefix: &'b [u8],
+    index: usize,
+}
+
+impl<'a> PrefixParserSlice<'a, '_> {
+    fn strip_prefix(&self, prefix: &str) -> Option<Self> {
+        self.prefix[self.index..]
+            .starts_with(prefix.as_bytes())
+            .then_some(Self { index: self.index + prefix.len(), ..*self })
+    }
+
+    fn prefix_bytes(&self) -> &'a [u8] {
+        &self.path.as_encoded_bytes()[..self.index]
+    }
+
+    fn finish(self) -> &'a OsStr {
+        // SAFETY: The unsafety here stems from converting between &OsStr and
+        // &[u8] and back. This is safe to do because (1) we only look at ASCII
+        // contents of the encoding and (2) new &OsStr values are produced only
+        // from ASCII-bounded slices of existing &OsStr values.
+        unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) }
+    }
+}
+
+pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
+    use Prefix::{DeviceNS, Disk, UNC, Verbatim, VerbatimDisk, VerbatimUNC};
+
+    let parser = PrefixParser::<8>::new(path);
+    let parser = parser.as_slice();
+    if let Some(parser) = parser.strip_prefix(r"\\") {
+        // \\
+
+        // It's a POSIX path.
+        if cfg!(target_os = "cygwin") && !path.as_encoded_bytes().iter().any(|&x| x == b'\\') {
+            return None;
+        }
+
+        // The meaning of verbatim paths can change when they use a different
+        // separator.
+        if let Some(parser) = parser.strip_prefix(r"?\")
+            // Cygwin allows `/` in verbatim paths.
+            && (cfg!(target_os = "cygwin") || !parser.prefix_bytes().iter().any(|&x| x == b'/'))
+        {
+            // \\?\
+            if let Some(parser) = parser.strip_prefix(r"UNC\") {
+                // \\?\UNC\server\share
+
+                let path = parser.finish();
+                let (server, path) = parse_next_component(path, true);
+                let (share, _) = parse_next_component(path, true);
+
+                Some(VerbatimUNC(server, share))
+            } else {
+                let path = parser.finish();
+
+                // in verbatim paths only recognize an exact drive prefix
+                if let Some(drive) = parse_drive_exact(path) {
+                    // \\?\C:
+                    Some(VerbatimDisk(drive))
+                } else {
+                    // \\?\prefix
+                    let (prefix, _) = parse_next_component(path, true);
+                    Some(Verbatim(prefix))
+                }
+            }
+        } else if let Some(parser) = parser.strip_prefix(r".\") {
+            // \\.\COM42
+            let path = parser.finish();
+            let (prefix, _) = parse_next_component(path, false);
+            Some(DeviceNS(prefix))
+        } else {
+            let path = parser.finish();
+            let (server, path) = parse_next_component(path, false);
+            let (share, _) = parse_next_component(path, false);
+
+            if !server.is_empty() && !share.is_empty() {
+                // \\server\share
+                Some(UNC(server, share))
+            } else {
+                // no valid prefix beginning with "\\" recognized
+                None
+            }
+        }
+    } else {
+        // If it has a drive like `C:` then it's a disk.
+        // Otherwise there is no prefix.
+        Some(Disk(parse_drive(path)?))
+    }
+}
+
+// Parses a drive prefix, e.g. "C:" and "C:\whatever"
+fn parse_drive(path: &OsStr) -> Option<u8> {
+    // In most DOS systems, it is not possible to have more than 26 drive letters.
+    // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
+    fn is_valid_drive_letter(drive: &u8) -> bool {
+        drive.is_ascii_alphabetic()
+    }
+
+    match path.as_encoded_bytes() {
+        [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
+        _ => None,
+    }
+}
+
+// Parses a drive prefix exactly, e.g. "C:"
+fn parse_drive_exact(path: &OsStr) -> Option<u8> {
+    // only parse two bytes: the drive letter and the drive separator
+    if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) {
+        parse_drive(path)
+    } else {
+        None
+    }
+}
+
+// Parse the next path component.
+//
+// Returns the next component and the rest of the path excluding the component and separator.
+// Does not recognize `/` as a separator character on Windows if `verbatim` is true.
+pub(crate) fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) {
+    let separator = if verbatim { is_verbatim_sep } else { is_sep_byte };
+
+    match path.as_encoded_bytes().iter().position(|&x| separator(x)) {
+        Some(separator_start) => {
+            let separator_end = separator_start + 1;
+
+            let component = &path.as_encoded_bytes()[..separator_start];
+
+            // Panic safe
+            // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index.
+            let path = &path.as_encoded_bytes()[separator_end..];
+
+            // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\')
+            // is encoded in a single byte, therefore `bytes[separator_start]` and
+            // `bytes[separator_end]` must be code point boundaries and thus
+            // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices.
+            unsafe {
+                (
+                    OsStr::from_encoded_bytes_unchecked(component),
+                    OsStr::from_encoded_bytes_unchecked(path),
+                )
+            }
+        }
+        None => (path, OsStr::new("")),
+    }
+}
diff --git a/library/std/tests/path.rs b/library/std/tests/path.rs
index be0dda1d426f3..901d2770f203e 100644
--- a/library/std/tests/path.rs
+++ b/library/std/tests/path.rs
@@ -1112,6 +1112,473 @@ pub fn test_decompositions_windows() {
     );
 }
 
+// Unix paths are tested in `test_decompositions_unix` above.
+#[test]
+#[cfg(target_os = "cygwin")]
+pub fn test_decompositions_cygwin() {
+    t!("\\",
+    iter: ["/"],
+    has_root: true,
+    is_absolute: false,
+    parent: None,
+    file_name: None,
+    file_stem: None,
+    extension: None,
+    file_prefix: None
+    );
+
+    t!("c:",
+    iter: ["c:"],
+    has_root: false,
+    is_absolute: false,
+    parent: None,
+    file_name: None,
+    file_stem: None,
+    extension: None,
+    file_prefix: None
+    );
+
+    t!("c:\\",
+    iter: ["c:", "/"],
+    has_root: true,
+    is_absolute: true,
+    parent: None,
+    file_name: None,
+    file_stem: None,
+    extension: None,
+    file_prefix: None
+    );
+
+    t!("c:/",
+    iter: ["c:", "/"],
+    has_root: true,
+    is_absolute: true,
+    parent: None,
+    file_name: None,
+    file_stem: None,
+    extension: None,
+    file_prefix: None
+    );
+
+    t!("a\\b\\c",
+    iter: ["a", "b", "c"],
+    has_root: false,
+    is_absolute: false,
+    parent: Some("a\\b"),
+    file_name: Some("c"),
+    file_stem: Some("c"),
+    extension: None,
+    file_prefix: Some("c")
+    );
+
+    t!("\\a",
+    iter: ["/", "a"],
+    has_root: true,
+    is_absolute: false,
+    parent: Some("\\"),
+    file_name: Some("a"),
+    file_stem: Some("a"),
+    extension: None,
+    file_prefix: Some("a")
+    );
+
+    t!("c:\\foo.txt",
+    iter: ["c:", "/", "foo.txt"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("c:\\"),
+    file_name: Some("foo.txt"),
+    file_stem: Some("foo"),
+    extension: Some("txt"),
+    file_prefix: Some("foo")
+    );
+
+    t!("\\\\server\\share\\foo.txt",
+    iter: ["\\\\server\\share", "/", "foo.txt"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("\\\\server\\share\\"),
+    file_name: Some("foo.txt"),
+    file_stem: Some("foo"),
+    extension: Some("txt"),
+    file_prefix: Some("foo")
+    );
+
+    t!("//server/share\\foo.txt",
+    iter: ["//server/share", "/", "foo.txt"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("//server/share\\"),
+    file_name: Some("foo.txt"),
+    file_stem: Some("foo"),
+    extension: Some("txt"),
+    file_prefix: Some("foo")
+    );
+
+    t!("//server/share/foo.txt",
+    iter: ["/", "server", "share", "foo.txt"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("//server/share"),
+    file_name: Some("foo.txt"),
+    file_stem: Some("foo"),
+    extension: Some("txt"),
+    file_prefix: Some("foo")
+    );
+
+    t!("\\\\server\\share",
+    iter: ["\\\\server\\share", "/"],
+    has_root: true,
+    is_absolute: true,
+    parent: None,
+    file_name: None,
+    file_stem: None,
+    extension: None,
+    file_prefix: None
+    );
+
+    t!("\\\\server",
+    iter: ["/", "server"],
+    has_root: true,
+    is_absolute: false,
+    parent: Some("\\"),
+    file_name: Some("server"),
+    file_stem: Some("server"),
+    extension: None,
+    file_prefix: Some("server")
+    );
+
+    t!("\\\\?\\bar\\foo.txt",
+    iter: ["\\\\?\\bar", "/", "foo.txt"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("\\\\?\\bar\\"),
+    file_name: Some("foo.txt"),
+    file_stem: Some("foo"),
+    extension: Some("txt"),
+    file_prefix: Some("foo")
+    );
+
+    t!("\\\\?\\bar",
+    iter: ["\\\\?\\bar"],
+    has_root: true,
+    is_absolute: true,
+    parent: None,
+    file_name: None,
+    file_stem: None,
+    extension: None,
+    file_prefix: None
+    );
+
+    t!("\\\\?\\",
+    iter: ["\\\\?\\"],
+    has_root: true,
+    is_absolute: true,
+    parent: None,
+    file_name: None,
+    file_stem: None,
+    extension: None,
+    file_prefix: None
+    );
+
+    t!("\\\\?\\UNC\\server\\share\\foo.txt",
+    iter: ["\\\\?\\UNC\\server\\share", "/", "foo.txt"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("\\\\?\\UNC\\server\\share\\"),
+    file_name: Some("foo.txt"),
+    file_stem: Some("foo"),
+    extension: Some("txt"),
+    file_prefix: Some("foo")
+    );
+
+    t!("\\\\?\\UNC\\server/share\\foo.txt",
+    iter: ["\\\\?\\UNC\\server/share", "/", "foo.txt"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("\\\\?\\UNC\\server/share\\"),
+    file_name: Some("foo.txt"),
+    file_stem: Some("foo"),
+    extension: Some("txt"),
+    file_prefix: Some("foo")
+    );
+
+    t!("//?/UNC/server\\share/foo.txt",
+    iter: ["//?/UNC/server\\share", "/", "foo.txt"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("//?/UNC/server\\share/"),
+    file_name: Some("foo.txt"),
+    file_stem: Some("foo"),
+    extension: Some("txt"),
+    file_prefix: Some("foo")
+    );
+
+    t!("//?/UNC/server/share/foo.txt",
+    iter: ["/", "?", "UNC", "server", "share", "foo.txt"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("//?/UNC/server/share"),
+    file_name: Some("foo.txt"),
+    file_stem: Some("foo"),
+    extension: Some("txt"),
+    file_prefix: Some("foo")
+    );
+
+    t!("\\\\?\\UNC\\server",
+    iter: ["\\\\?\\UNC\\server"],
+    has_root: true,
+    is_absolute: true,
+    parent: None,
+    file_name: None,
+    file_stem: None,
+    extension: None,
+    file_prefix: None
+    );
+
+    t!("\\\\?\\UNC\\",
+    iter: ["\\\\?\\UNC\\"],
+    has_root: true,
+    is_absolute: true,
+    parent: None,
+    file_name: None,
+    file_stem: None,
+    extension: None,
+    file_prefix: None
+    );
+
+    t!("\\\\?\\C:\\foo.txt",
+    iter: ["\\\\?\\C:", "/", "foo.txt"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("\\\\?\\C:\\"),
+    file_name: Some("foo.txt"),
+    file_stem: Some("foo"),
+    extension: Some("txt"),
+    file_prefix: Some("foo")
+    );
+
+    t!("//?/C:\\foo.txt",
+    iter: ["//?/C:", "/", "foo.txt"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("//?/C:\\"),
+    file_name: Some("foo.txt"),
+    file_stem: Some("foo"),
+    extension: Some("txt"),
+    file_prefix: Some("foo")
+    );
+
+    t!("//?/C:/foo.txt",
+    iter: ["/", "?", "C:", "foo.txt"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("//?/C:"),
+    file_name: Some("foo.txt"),
+    file_stem: Some("foo"),
+    extension: Some("txt"),
+    file_prefix: Some("foo")
+    );
+
+    t!("\\\\?\\C:\\",
+    iter: ["\\\\?\\C:", "/"],
+    has_root: true,
+    is_absolute: true,
+    parent: None,
+    file_name: None,
+    file_stem: None,
+    extension: None,
+    file_prefix: None
+    );
+
+    t!("\\\\?\\C:",
+    iter: ["\\\\?\\C:"],
+    has_root: true,
+    is_absolute: true,
+    parent: None,
+    file_name: None,
+    file_stem: None,
+    extension: None,
+    file_prefix: None
+    );
+
+    t!("\\\\?\\foo/bar",
+    iter: ["\\\\?\\foo", "/", "bar"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("\\\\?\\foo/"),
+    file_name: Some("bar"),
+    file_stem: Some("bar"),
+    extension: None,
+    file_prefix: Some("bar")
+    );
+
+    t!("\\\\?\\C:/foo/bar",
+    iter: ["\\\\?\\C:", "/", "foo", "bar"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("\\\\?\\C:/foo"),
+    file_name: Some("bar"),
+    file_stem: Some("bar"),
+    extension: None,
+    file_prefix: Some("bar")
+    );
+
+    t!("\\\\.\\foo\\bar",
+    iter: ["\\\\.\\foo", "/", "bar"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("\\\\.\\foo\\"),
+    file_name: Some("bar"),
+    file_stem: Some("bar"),
+    extension: None,
+    file_prefix: Some("bar")
+    );
+
+    t!("\\\\.\\foo",
+    iter: ["\\\\.\\foo", "/"],
+    has_root: true,
+    is_absolute: true,
+    parent: None,
+    file_name: None,
+    file_stem: None,
+    extension: None,
+    file_prefix: None
+    );
+
+    t!("\\\\.\\foo/bar",
+    iter: ["\\\\.\\foo", "/", "bar"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("\\\\.\\foo/"),
+    file_name: Some("bar"),
+    file_stem: Some("bar"),
+    extension: None,
+    file_prefix: Some("bar")
+    );
+
+    t!("\\\\.\\foo\\bar/baz",
+    iter: ["\\\\.\\foo", "/", "bar", "baz"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("\\\\.\\foo\\bar"),
+    file_name: Some("baz"),
+    file_stem: Some("baz"),
+    extension: None,
+    file_prefix: Some("baz")
+    );
+
+    t!("\\\\.\\",
+    iter: ["\\\\.\\", "/"],
+    has_root: true,
+    is_absolute: true,
+    parent: None,
+    file_name: None,
+    file_stem: None,
+    extension: None,
+    file_prefix: None
+    );
+
+    t!("//.\\foo/bar",
+    iter: ["//.\\foo", "/", "bar"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("//.\\foo/"),
+    file_name: Some("bar"),
+    file_stem: Some("bar"),
+    extension: None,
+    file_prefix: Some("bar")
+    );
+
+    t!("\\\\./foo/bar",
+    iter: ["\\\\./foo", "/", "bar"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("\\\\./foo/"),
+    file_name: Some("bar"),
+    file_stem: Some("bar"),
+    extension: None,
+    file_prefix: Some("bar")
+    );
+
+    t!("//./foo\\bar",
+    iter: ["//./foo", "/", "bar"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("//./foo\\"),
+    file_name: Some("bar"),
+    file_stem: Some("bar"),
+    extension: None,
+    file_prefix: Some("bar")
+    );
+
+    t!("//./?/C:/foo/bar",
+    iter: ["/", "?", "C:", "foo", "bar"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("//./?/C:/foo"),
+    file_name: Some("bar"),
+    file_stem: Some("bar"),
+    extension: None,
+    file_prefix: Some("bar")
+    );
+
+    t!("//././../././../?/C:/foo/bar",
+    iter: ["/", "..", "..", "?", "C:", "foo", "bar"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("//././../././../?/C:/foo"),
+    file_name: Some("bar"),
+    file_stem: Some("bar"),
+    extension: None,
+    file_prefix: Some("bar")
+    );
+
+    t!("\\\\?\\a\\b\\",
+    iter: ["\\\\?\\a", "/", "b"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("\\\\?\\a\\"),
+    file_name: Some("b"),
+    file_stem: Some("b"),
+    extension: None,
+    file_prefix: Some("b")
+    );
+
+    t!("\\\\?\\C:\\foo.txt.zip",
+    iter: ["\\\\?\\C:", "/", "foo.txt.zip"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("\\\\?\\C:\\"),
+    file_name: Some("foo.txt.zip"),
+    file_stem: Some("foo.txt"),
+    extension: Some("zip"),
+    file_prefix: Some("foo")
+    );
+
+    t!("\\\\?\\C:\\.foo.txt.zip",
+    iter: ["\\\\?\\C:", "/", ".foo.txt.zip"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("\\\\?\\C:\\"),
+    file_name: Some(".foo.txt.zip"),
+    file_stem: Some(".foo.txt"),
+    extension: Some("zip"),
+    file_prefix: Some(".foo")
+    );
+
+    t!("\\\\?\\C:\\.foo",
+    iter: ["\\\\?\\C:", "/", ".foo"],
+    has_root: true,
+    is_absolute: true,
+    parent: Some("\\\\?\\C:\\"),
+    file_name: Some(".foo"),
+    file_stem: Some(".foo"),
+    extension: None,
+    file_prefix: Some(".foo")
+    );
+}
+
 #[test]
 pub fn test_stem_ext() {
     t!("foo",
@@ -1227,6 +1694,11 @@ pub fn test_push() {
         tp!("/foo/bar", "/", "/");
         tp!("/foo/bar", "/baz", "/baz");
         tp!("/foo/bar", "./baz", "/foo/bar/./baz");
+
+        if cfg!(target_os = "cygwin") {
+            tp!("c:\\", "windows", "c:\\windows");
+            tp!("c:", "windows", "c:windows");
+        }
     } else {
         tp!("", "foo", "foo");
         tp!("foo", "bar", r"foo\bar");
diff --git a/src/bootstrap/src/utils/channel.rs b/src/bootstrap/src/utils/channel.rs
index 38f250af42f08..b28ab57377408 100644
--- a/src/bootstrap/src/utils/channel.rs
+++ b/src/bootstrap/src/utils/channel.rs
@@ -11,7 +11,7 @@ use std::path::Path;
 use super::execution_context::ExecutionContext;
 use super::helpers;
 use crate::Build;
-use crate::utils::helpers::{start_process, t};
+use crate::utils::helpers::t;
 
 #[derive(Clone, Default)]
 pub enum GitInfo {
@@ -46,7 +46,7 @@ impl GitInfo {
 
         let mut git_command = helpers::git(Some(dir));
         git_command.arg("rev-parse");
-        let output = git_command.allow_failure().run_capture(exec_ctx);
+        let output = git_command.allow_failure().run_capture(&exec_ctx);
 
         if output.is_failure() {
             return GitInfo::Absent;
@@ -59,23 +59,32 @@ impl GitInfo {
         }
 
         // Ok, let's scrape some info
-        let ver_date = start_process(
-            helpers::git(Some(dir))
-                .arg("log")
-                .arg("-1")
-                .arg("--date=short")
-                .arg("--pretty=format:%cd")
-                .as_command_mut(),
-        );
+        // We use the command's spawn API to execute these commands concurrently, which leads to performance improvements.
+        let mut git_log_cmd = helpers::git(Some(dir));
+        let ver_date = git_log_cmd
+            .arg("log")
+            .arg("-1")
+            .arg("--date=short")
+            .arg("--pretty=format:%cd")
+            .run_always()
+            .start_capture_stdout(&exec_ctx);
+
+        let mut git_hash_cmd = helpers::git(Some(dir));
         let ver_hash =
-            start_process(helpers::git(Some(dir)).arg("rev-parse").arg("HEAD").as_command_mut());
-        let short_ver_hash = start_process(
-            helpers::git(Some(dir)).arg("rev-parse").arg("--short=9").arg("HEAD").as_command_mut(),
-        );
+            git_hash_cmd.arg("rev-parse").arg("HEAD").run_always().start_capture_stdout(&exec_ctx);
+
+        let mut git_short_hash_cmd = helpers::git(Some(dir));
+        let short_ver_hash = git_short_hash_cmd
+            .arg("rev-parse")
+            .arg("--short=9")
+            .arg("HEAD")
+            .run_always()
+            .start_capture_stdout(&exec_ctx);
+
         GitInfo::Present(Some(Info {
-            commit_date: ver_date().trim().to_string(),
-            sha: ver_hash().trim().to_string(),
-            short_sha: short_ver_hash().trim().to_string(),
+            commit_date: ver_date.wait_for_output(&exec_ctx).stdout().trim().to_string(),
+            sha: ver_hash.wait_for_output(&exec_ctx).stdout().trim().to_string(),
+            short_sha: short_ver_hash.wait_for_output(&exec_ctx).stdout().trim().to_string(),
         }))
     }
 
diff --git a/src/bootstrap/src/utils/exec.rs b/src/bootstrap/src/utils/exec.rs
index f297300e34a85..eb9802bf2e1ba 100644
--- a/src/bootstrap/src/utils/exec.rs
+++ b/src/bootstrap/src/utils/exec.rs
@@ -2,7 +2,6 @@
 //!
 //! This module provides a structured way to execute and manage commands efficiently,
 //! ensuring controlled failure handling and output management.
-
 use std::ffi::OsStr;
 use std::fmt::{Debug, Formatter};
 use std::path::Path;
@@ -11,7 +10,7 @@ use std::process::{Command, CommandArgs, CommandEnvs, ExitStatus, Output, Stdio}
 use build_helper::ci::CiEnv;
 use build_helper::drop_bomb::DropBomb;
 
-use super::execution_context::ExecutionContext;
+use super::execution_context::{DeferredCommand, ExecutionContext};
 
 /// What should be done when the command fails.
 #[derive(Debug, Copy, Clone)]
@@ -73,7 +72,7 @@ pub struct BootstrapCommand {
     drop_bomb: DropBomb,
 }
 
-impl BootstrapCommand {
+impl<'a> BootstrapCommand {
     #[track_caller]
     pub fn new<S: AsRef<OsStr>>(program: S) -> Self {
         Command::new(program).into()
@@ -158,6 +157,24 @@ impl BootstrapCommand {
         exec_ctx.as_ref().run(self, OutputMode::Capture, OutputMode::Print)
     }
 
+    /// Spawn the command in background, while capturing and returning all its output.
+    #[track_caller]
+    pub fn start_capture(
+        &'a mut self,
+        exec_ctx: impl AsRef<ExecutionContext>,
+    ) -> DeferredCommand<'a> {
+        exec_ctx.as_ref().start(self, OutputMode::Capture, OutputMode::Capture)
+    }
+
+    /// Spawn the command in background, while capturing and returning stdout, and printing stderr.
+    #[track_caller]
+    pub fn start_capture_stdout(
+        &'a mut self,
+        exec_ctx: impl AsRef<ExecutionContext>,
+    ) -> DeferredCommand<'a> {
+        exec_ctx.as_ref().start(self, OutputMode::Capture, OutputMode::Print)
+    }
+
     /// Provides access to the stdlib Command inside.
     /// FIXME: This function should be eventually removed from bootstrap.
     pub fn as_command_mut(&mut self) -> &mut Command {
diff --git a/src/bootstrap/src/utils/execution_context.rs b/src/bootstrap/src/utils/execution_context.rs
index a5e1e9bcc07df..5b9fef3f8248b 100644
--- a/src/bootstrap/src/utils/execution_context.rs
+++ b/src/bootstrap/src/utils/execution_context.rs
@@ -3,6 +3,8 @@
 //! This module provides the [`ExecutionContext`] type, which holds global configuration
 //! relevant during the execution of commands in bootstrap. This includes dry-run
 //! mode, verbosity level, and behavior on failure.
+use std::panic::Location;
+use std::process::Child;
 use std::sync::{Arc, Mutex};
 
 use crate::core::config::DryRun;
@@ -80,23 +82,24 @@ impl ExecutionContext {
     /// Note: Ideally, you should use one of the BootstrapCommand::run* functions to
     /// execute commands. They internally call this method.
     #[track_caller]
-    pub fn run(
+    pub fn start<'a>(
         &self,
-        command: &mut BootstrapCommand,
+        command: &'a mut BootstrapCommand,
         stdout: OutputMode,
         stderr: OutputMode,
-    ) -> CommandOutput {
+    ) -> DeferredCommand<'a> {
         command.mark_as_executed();
+
+        let created_at = command.get_created_location();
+        let executed_at = std::panic::Location::caller();
+
         if self.dry_run() && !command.run_always {
-            return CommandOutput::default();
+            return DeferredCommand { process: None, stdout, stderr, command, executed_at };
         }
 
         #[cfg(feature = "tracing")]
         let _run_span = trace_cmd!(command);
 
-        let created_at = command.get_created_location();
-        let executed_at = std::panic::Location::caller();
-
         self.verbose(|| {
             println!("running: {command:?} (created at {created_at}, executed at {executed_at})")
         });
@@ -105,92 +108,149 @@ impl ExecutionContext {
         cmd.stdout(stdout.stdio());
         cmd.stderr(stderr.stdio());
 
-        let output = cmd.output();
+        let child = cmd.spawn();
 
-        use std::fmt::Write;
+        DeferredCommand { process: Some(child), stdout, stderr, command, executed_at }
+    }
 
-        let mut message = String::new();
-        let output: CommandOutput = match output {
-            // Command has succeeded
-            Ok(output) if output.status.success() => {
-                CommandOutput::from_output(output, stdout, stderr)
+    /// Execute a command and return its output.
+    /// Note: Ideally, you should use one of the BootstrapCommand::run* functions to
+    /// execute commands. They internally call this method.
+    #[track_caller]
+    pub fn run(
+        &self,
+        command: &mut BootstrapCommand,
+        stdout: OutputMode,
+        stderr: OutputMode,
+    ) -> CommandOutput {
+        self.start(command, stdout, stderr).wait_for_output(self)
+    }
+
+    fn fail(&self, message: &str, output: CommandOutput) -> ! {
+        if self.is_verbose() {
+            println!("{message}");
+        } else {
+            let (stdout, stderr) = (output.stdout_if_present(), output.stderr_if_present());
+            // If the command captures output, the user would not see any indication that
+            // it has failed. In this case, print a more verbose error, since to provide more
+            // context.
+            if stdout.is_some() || stderr.is_some() {
+                if let Some(stdout) = output.stdout_if_present().take_if(|s| !s.trim().is_empty()) {
+                    println!("STDOUT:\n{stdout}\n");
+                }
+                if let Some(stderr) = output.stderr_if_present().take_if(|s| !s.trim().is_empty()) {
+                    println!("STDERR:\n{stderr}\n");
+                }
+                println!("Command has failed. Rerun with -v to see more details.");
+            } else {
+                println!("Command has failed. Rerun with -v to see more details.");
             }
-            // Command has started, but then it failed
-            Ok(output) => {
-                writeln!(
-                    message,
-                    r#"
-Command {command:?} did not execute successfully.
+        }
+        exit!(1);
+    }
+}
+
+impl AsRef<ExecutionContext> for ExecutionContext {
+    fn as_ref(&self) -> &ExecutionContext {
+        self
+    }
+}
+
+pub struct DeferredCommand<'a> {
+    process: Option<Result<Child, std::io::Error>>,
+    command: &'a mut BootstrapCommand,
+    stdout: OutputMode,
+    stderr: OutputMode,
+    executed_at: &'a Location<'a>,
+}
+
+impl<'a> DeferredCommand<'a> {
+    pub fn wait_for_output(mut self, exec_ctx: impl AsRef<ExecutionContext>) -> CommandOutput {
+        let exec_ctx = exec_ctx.as_ref();
+
+        let process = match self.process.take() {
+            Some(p) => p,
+            None => return CommandOutput::default(),
+        };
+
+        let created_at = self.command.get_created_location();
+        let executed_at = self.executed_at;
+
+        let mut message = String::new();
+
+        let output = match process {
+            Ok(child) => match child.wait_with_output() {
+                Ok(result) if result.status.success() => {
+                    // Successful execution
+                    CommandOutput::from_output(result, self.stdout, self.stderr)
+                }
+                Ok(result) => {
+                    // Command ran but failed
+                    use std::fmt::Write;
+
+                    writeln!(
+                        message,
+                        r#"
+Command {:?} did not execute successfully.
 Expected success, got {}
 Created at: {created_at}
 Executed at: {executed_at}"#,
-                    output.status,
-                )
-                .unwrap();
+                        self.command, result.status,
+                    )
+                    .unwrap();
+
+                    let output = CommandOutput::from_output(result, self.stdout, self.stderr);
 
-                let output: CommandOutput = CommandOutput::from_output(output, stdout, stderr);
+                    if self.stdout.captures() {
+                        writeln!(message, "\nSTDOUT ----\n{}", output.stdout().trim()).unwrap();
+                    }
+                    if self.stderr.captures() {
+                        writeln!(message, "\nSTDERR ----\n{}", output.stderr().trim()).unwrap();
+                    }
 
-                // If the output mode is OutputMode::Capture, we can now print the output.
-                // If it is OutputMode::Print, then the output has already been printed to
-                // stdout/stderr, and we thus don't have anything captured to print anyway.
-                if stdout.captures() {
-                    writeln!(message, "\nSTDOUT ----\n{}", output.stdout().trim()).unwrap();
+                    output
                 }
-                if stderr.captures() {
-                    writeln!(message, "\nSTDERR ----\n{}", output.stderr().trim()).unwrap();
+                Err(e) => {
+                    // Failed to wait for output
+                    use std::fmt::Write;
+
+                    writeln!(
+                        message,
+                        "\n\nCommand {:?} did not execute successfully.\
+                        \nIt was not possible to execute the command: {e:?}",
+                        self.command
+                    )
+                    .unwrap();
+
+                    CommandOutput::did_not_start(self.stdout, self.stderr)
                 }
-                output
-            }
-            // The command did not even start
+            },
             Err(e) => {
+                // Failed to spawn the command
+                use std::fmt::Write;
+
                 writeln!(
                     message,
-                    "\n\nCommand {command:?} did not execute successfully.\
-            \nIt was not possible to execute the command: {e:?}"
+                    "\n\nCommand {:?} did not execute successfully.\
+                    \nIt was not possible to execute the command: {e:?}",
+                    self.command
                 )
                 .unwrap();
-                CommandOutput::did_not_start(stdout, stderr)
-            }
-        };
 
-        let fail = |message: &str, output: CommandOutput| -> ! {
-            if self.is_verbose() {
-                println!("{message}");
-            } else {
-                let (stdout, stderr) = (output.stdout_if_present(), output.stderr_if_present());
-                // If the command captures output, the user would not see any indication that
-                // it has failed. In this case, print a more verbose error, since to provide more
-                // context.
-                if stdout.is_some() || stderr.is_some() {
-                    if let Some(stdout) =
-                        output.stdout_if_present().take_if(|s| !s.trim().is_empty())
-                    {
-                        println!("STDOUT:\n{stdout}\n");
-                    }
-                    if let Some(stderr) =
-                        output.stderr_if_present().take_if(|s| !s.trim().is_empty())
-                    {
-                        println!("STDERR:\n{stderr}\n");
-                    }
-                    println!("Command {command:?} has failed. Rerun with -v to see more details.");
-                } else {
-                    println!("Command has failed. Rerun with -v to see more details.");
-                }
+                CommandOutput::did_not_start(self.stdout, self.stderr)
             }
-            exit!(1);
         };
 
         if !output.is_success() {
-            match command.failure_behavior {
+            match self.command.failure_behavior {
                 BehaviorOnFailure::DelayFail => {
-                    if self.fail_fast {
-                        fail(&message, output);
+                    if exec_ctx.fail_fast {
+                        exec_ctx.fail(&message, output);
                     }
-
-                    self.add_to_delay_failure(message);
+                    exec_ctx.add_to_delay_failure(message);
                 }
                 BehaviorOnFailure::Exit => {
-                    fail(&message, output);
+                    exec_ctx.fail(&message, output);
                 }
                 BehaviorOnFailure::Ignore => {
                     // If failures are allowed, either the error has been printed already
@@ -199,6 +259,7 @@ Executed at: {executed_at}"#,
                 }
             }
         }
+
         output
     }
 }
diff --git a/src/bootstrap/src/utils/helpers.rs b/src/bootstrap/src/utils/helpers.rs
index f4be22f1e649e..2f18fb6031829 100644
--- a/src/bootstrap/src/utils/helpers.rs
+++ b/src/bootstrap/src/utils/helpers.rs
@@ -5,13 +5,11 @@
 
 use std::ffi::OsStr;
 use std::path::{Path, PathBuf};
-use std::process::{Command, Stdio};
 use std::sync::OnceLock;
 use std::thread::panicking;
 use std::time::{Instant, SystemTime, UNIX_EPOCH};
 use std::{env, fs, io, panic, str};
 
-use build_helper::util::fail;
 use object::read::archive::ArchiveFile;
 
 use crate::LldMode;
@@ -282,33 +280,6 @@ pub fn make(host: &str) -> PathBuf {
     }
 }
 
-/// Spawn a process and return a closure that will wait for the process
-/// to finish and then return its output. This allows the spawned process
-/// to do work without immediately blocking bootstrap.
-#[track_caller]
-pub fn start_process(cmd: &mut Command) -> impl FnOnce() -> String + use<> {
-    let child = match cmd.stderr(Stdio::inherit()).stdout(Stdio::piped()).spawn() {
-        Ok(child) => child,
-        Err(e) => fail(&format!("failed to execute command: {cmd:?}\nERROR: {e}")),
-    };
-
-    let command = format!("{cmd:?}");
-
-    move || {
-        let output = child.wait_with_output().unwrap();
-
-        if !output.status.success() {
-            panic!(
-                "command did not execute successfully: {}\n\
-                 expected success, got: {}",
-                command, output.status
-            );
-        }
-
-        String::from_utf8(output.stdout).unwrap()
-    }
-}
-
 /// Returns the last-modified time for `path`, or zero if it doesn't exist.
 pub fn mtime(path: &Path) -> SystemTime {
     fs::metadata(path).and_then(|f| f.modified()).unwrap_or(UNIX_EPOCH)
diff --git a/src/ci/docker/host-x86_64/dist-aarch64-windows-gnullvm/Dockerfile b/src/ci/docker/host-x86_64/dist-aarch64-windows-gnullvm/Dockerfile
new file mode 100644
index 0000000000000..cdbc1cda02558
--- /dev/null
+++ b/src/ci/docker/host-x86_64/dist-aarch64-windows-gnullvm/Dockerfile
@@ -0,0 +1,48 @@
+FROM ubuntu:24.04
+
+WORKDIR /build
+
+ARG DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ca-certificates \
+    cmake \
+    curl \
+    g++ \
+    git \
+    make \
+    ninja-build \
+    python3 \
+    xz-utils
+
+ENV ARCH=aarch64
+COPY host-x86_64/dist-x86_64-windows-gnullvm/install-llvm-mingw.sh /build
+RUN ./install-llvm-mingw.sh
+
+COPY scripts/sccache.sh /scripts/
+RUN sh /scripts/sccache.sh
+
+ENV CC_aarch64_pc_windows_gnullvm=aarch64-w64-mingw32-clang \
+    CXX_aarch64_pc_windows_gnullvm=aarch64-w64-mingw32-clang++
+
+ENV HOST=aarch64-pc-windows-gnullvm
+
+# We are bootstrapping this target and cannot use previously built artifacts.
+# Without this option Clang is given `"-I/checkout/obj/build/aarch64-pc-windows-gnullvm/ci-llvm/include"`
+# despite no such directory existing:
+# $ ls obj/dist-windows-gnullvm/build/aarch64-pc-windows-gnullvm/ -1
+# llvm
+# stage2
+ENV NO_DOWNLOAD_CI_LLVM 1
+
+ENV RUST_CONFIGURE_ARGS \
+    --enable-extended \
+    --enable-profiler \
+    --enable-sanitizers \
+    --disable-docs \
+    --set llvm.download-ci-llvm=false \
+    --set rust.llvm-tools=false
+# LLVM cross tools are not installed into expected location so copying fails.
+# Probably will solve itself once this target can host itself on Windows.
+# --enable-full-tools \
+
+ENV SCRIPT python3 ../x.py dist --host $HOST --target $HOST
diff --git a/src/ci/docker/host-x86_64/dist-various-1/Dockerfile b/src/ci/docker/host-x86_64/dist-various-1/Dockerfile
index 00552db4b0144..af48e9bcfde9d 100644
--- a/src/ci/docker/host-x86_64/dist-various-1/Dockerfile
+++ b/src/ci/docker/host-x86_64/dist-various-1/Dockerfile
@@ -114,9 +114,6 @@ ENV TARGETS=$TARGETS,armv7r-none-eabi
 ENV TARGETS=$TARGETS,armv7r-none-eabihf
 ENV TARGETS=$TARGETS,thumbv7neon-unknown-linux-gnueabihf
 ENV TARGETS=$TARGETS,armv7a-none-eabi
-ENV TARGETS=$TARGETS,aarch64-pc-windows-gnullvm
-ENV TARGETS=$TARGETS,i686-pc-windows-gnullvm
-ENV TARGETS=$TARGETS,x86_64-pc-windows-gnullvm
 
 ENV CFLAGS_armv5te_unknown_linux_musleabi="-march=armv5te -marm -mfloat-abi=soft" \
     CFLAGS_arm_unknown_linux_musleabi="-march=armv6 -marm" \
@@ -148,10 +145,7 @@ ENV CFLAGS_armv5te_unknown_linux_musleabi="-march=armv5te -marm -mfloat-abi=soft
     CC_riscv64imac_unknown_none_elf=riscv64-unknown-elf-gcc \
     CFLAGS_riscv64imac_unknown_none_elf=-march=rv64imac -mabi=lp64 \
     CC_riscv64gc_unknown_none_elf=riscv64-unknown-elf-gcc \
-    CFLAGS_riscv64gc_unknown_none_elf=-march=rv64gc -mabi=lp64 \
-    CC_aarch64_pc_windows_gnullvm=aarch64-w64-mingw32-clang \
-    CC_i686_pc_windows_gnullvm=i686-w64-mingw32-clang \
-    CC_x86_64_pc_windows_gnullvm=x86_64-w64-mingw32-clang
+    CFLAGS_riscv64gc_unknown_none_elf=-march=rv64gc -mabi=lp64
 
 ENV RUST_CONFIGURE_ARGS \
       --musl-root-armv5te=/musl-armv5te \
diff --git a/src/ci/docker/host-x86_64/dist-various-1/install-llvm-mingw.sh b/src/ci/docker/host-x86_64/dist-various-1/install-llvm-mingw.sh
deleted file mode 100755
index 95471895fe771..0000000000000
--- a/src/ci/docker/host-x86_64/dist-various-1/install-llvm-mingw.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/usr/bin/env bash
-
-set -ex
-
-release_date=20240404
-archive=llvm-mingw-${release_date}-ucrt-ubuntu-20.04-x86_64.tar.xz
-curl -L https://github.com/mstorsjo/llvm-mingw/releases/download/${release_date}/${archive} | \
-tar --extract --lzma --strip 1 --directory /usr/local
diff --git a/src/ci/docker/host-x86_64/dist-x86_64-windows-gnullvm/Dockerfile b/src/ci/docker/host-x86_64/dist-x86_64-windows-gnullvm/Dockerfile
new file mode 100644
index 0000000000000..1ee3951beb563
--- /dev/null
+++ b/src/ci/docker/host-x86_64/dist-x86_64-windows-gnullvm/Dockerfile
@@ -0,0 +1,50 @@
+FROM ubuntu:24.04
+
+WORKDIR /build
+
+ARG DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ca-certificates \
+    cmake \
+    curl \
+    g++ \
+    git \
+    make \
+    ninja-build \
+    python3 \
+    xz-utils
+
+ENV ARCH='i686 x86_64'
+COPY host-x86_64/dist-x86_64-windows-gnullvm/install-llvm-mingw.sh /build
+RUN ./install-llvm-mingw.sh
+
+COPY scripts/sccache.sh /scripts/
+RUN sh /scripts/sccache.sh
+
+ENV CC_i686_pc_windows_gnullvm=i686-w64-mingw32-clang \
+    CC_x86_64_pc_windows_gnullvm=x86_64-w64-mingw32-clang \
+    CXX_x86_64_pc_windows_gnullvm=x86_64-w64-mingw32-clang++
+
+ENV HOST=x86_64-pc-windows-gnullvm
+ENV TARGETS=i686-pc-windows-gnullvm,x86_64-pc-windows-gnullvm
+
+# We are bootstrapping this target and cannot use previously built artifacts.
+# Without this option Clang is given `"-I/checkout/obj/build/aarch64-pc-windows-gnullvm/ci-llvm/include"`
+# despite no such directory existing:
+# $ ls obj/dist-windows-gnullvm/build/aarch64-pc-windows-gnullvm/ -1
+# llvm
+# stage2
+ENV NO_DOWNLOAD_CI_LLVM 1
+
+ENV RUST_CONFIGURE_ARGS \
+    --enable-extended \
+    --enable-profiler \
+    --enable-sanitizers \
+    --disable-docs \
+    --set llvm.download-ci-llvm=false \
+    --set rust.llvm-tools=false
+# LLVM cross tools are not installed into expected location so copying fails.
+# Probably will solve itself once these targets can host themselves on Windows.
+# --enable-full-tools \
+
+ENV SCRIPT python3 ../x.py dist --host $HOST --target $TARGETS
diff --git a/src/ci/docker/host-x86_64/dist-x86_64-windows-gnullvm/install-llvm-mingw.sh b/src/ci/docker/host-x86_64/dist-x86_64-windows-gnullvm/install-llvm-mingw.sh
new file mode 100755
index 0000000000000..0ea5dae3ffbd9
--- /dev/null
+++ b/src/ci/docker/host-x86_64/dist-x86_64-windows-gnullvm/install-llvm-mingw.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+set -ex
+
+release_date=20250613
+archive=llvm-mingw-${release_date}-ucrt-ubuntu-22.04-x86_64.tar.xz
+curl -L https://github.com/mstorsjo/llvm-mingw/releases/download/${release_date}/${archive} | \
+tar --extract --xz --strip 1 --directory /usr/local
+
+# https://github.com/mstorsjo/llvm-mingw/issues/493
+for arch in $ARCH; do
+    ln -s $arch-w64-windows-gnu.cfg /usr/local/bin/$arch-pc-windows-gnu.cfg
+done
diff --git a/src/ci/github-actions/jobs.yml b/src/ci/github-actions/jobs.yml
index 217cf764afbb1..e996a8f7139d3 100644
--- a/src/ci/github-actions/jobs.yml
+++ b/src/ci/github-actions/jobs.yml
@@ -237,6 +237,12 @@ auto:
   - name: dist-s390x-linux
     <<: *job-linux-4c
 
+  - name: dist-aarch64-windows-gnullvm
+    <<: *job-linux-4c
+
+  - name: dist-x86_64-windows-gnullvm
+    <<: *job-linux-4c
+
   - name: dist-various-1
     <<: *job-linux-4c
 
diff --git a/src/doc/rustc/src/platform-support.md b/src/doc/rustc/src/platform-support.md
index 3cab57df75a82..285b1e519b7cc 100644
--- a/src/doc/rustc/src/platform-support.md
+++ b/src/doc/rustc/src/platform-support.md
@@ -88,6 +88,7 @@ so Rustup may install the documentation for a similar tier 1 target instead.
 
 target | notes
 -------|-------
+[`aarch64-pc-windows-gnullvm`](platform-support/windows-gnullvm.md) | ARM64 MinGW (Windows 10+), LLVM ABI
 [`aarch64-pc-windows-msvc`](platform-support/windows-msvc.md) | ARM64 Windows MSVC
 `aarch64-unknown-linux-musl` | ARM64 Linux with musl 1.2.3
 [`aarch64-unknown-linux-ohos`](platform-support/openharmony.md) | ARM64 OpenHarmony
@@ -105,6 +106,7 @@ target | notes
 [`riscv64gc-unknown-linux-gnu`](platform-support/riscv64gc-unknown-linux-gnu.md) | RISC-V Linux (kernel 4.20+, glibc 2.29)
 [`riscv64gc-unknown-linux-musl`](platform-support/riscv64gc-unknown-linux-musl.md) | RISC-V Linux (kernel 4.20+, musl 1.2.3)
 [`s390x-unknown-linux-gnu`](platform-support/s390x-unknown-linux-gnu.md) | S390x Linux (kernel 3.2+, glibc 2.17)
+[`x86_64-pc-windows-gnullvm`](platform-support/windows-gnullvm.md) | 64-bit x86 MinGW (Windows 10+), LLVM ABI
 [`x86_64-unknown-freebsd`](platform-support/freebsd.md) | 64-bit x86 FreeBSD
 [`x86_64-unknown-illumos`](platform-support/illumos.md) | illumos
 `x86_64-unknown-linux-musl` | 64-bit Linux with musl 1.2.3
@@ -147,7 +149,6 @@ target | std | notes
 [`aarch64-apple-ios-macabi`](platform-support/apple-ios-macabi.md) | ✓ | Mac Catalyst on ARM64
 [`aarch64-apple-ios-sim`](platform-support/apple-ios.md) | ✓ | Apple iOS Simulator on ARM64
 [`aarch64-linux-android`](platform-support/android.md) | ✓ | ARM64 Android
-[`aarch64-pc-windows-gnullvm`](platform-support/windows-gnullvm.md) | ✓ | ARM64 MinGW (Windows 10+), LLVM ABI
 [`aarch64-unknown-fuchsia`](platform-support/fuchsia.md) | ✓ | ARM64 Fuchsia
 `aarch64-unknown-none` | * | Bare ARM64, hardfloat
 `aarch64-unknown-none-softfloat` | * | Bare ARM64, softfloat
@@ -204,7 +205,6 @@ target | std | notes
 [`x86_64-apple-ios-macabi`](platform-support/apple-ios-macabi.md) | ✓ | Mac Catalyst on x86_64
 [`x86_64-fortanix-unknown-sgx`](platform-support/x86_64-fortanix-unknown-sgx.md) | ✓ | [Fortanix ABI] for 64-bit Intel SGX
 [`x86_64-linux-android`](platform-support/android.md) | ✓ | 64-bit x86 Android
-[`x86_64-pc-windows-gnullvm`](platform-support/windows-gnullvm.md) | ✓ | 64-bit x86 MinGW (Windows 10+), LLVM ABI
 [`x86_64-unknown-fuchsia`](platform-support/fuchsia.md) | ✓ | 64-bit x86 Fuchsia
 `x86_64-unknown-linux-gnux32` | ✓ | 64-bit Linux (x32 ABI) (kernel 4.15+, glibc 2.27)
 [`x86_64-unknown-none`](platform-support/x86_64-unknown-none.md) | * | Freestanding/bare-metal x86_64, softfloat
diff --git a/src/doc/rustc/src/platform-support/windows-gnullvm.md b/src/doc/rustc/src/platform-support/windows-gnullvm.md
index f3dc73165ac7e..b469af0153130 100644
--- a/src/doc/rustc/src/platform-support/windows-gnullvm.md
+++ b/src/doc/rustc/src/platform-support/windows-gnullvm.md
@@ -1,6 +1,6 @@
 # \*-windows-gnullvm
 
-**Tier: 2 (without host tools)**
+**Tier: 2 (with host tools)**
 
 Windows targets similar to `*-windows-gnu` but using UCRT as the runtime and various LLVM tools/libraries instead of GCC/Binutils.
 
diff --git a/src/doc/unstable-book/src/compiler-flags/hint-mostly-unused.md b/src/doc/unstable-book/src/compiler-flags/hint-mostly-unused.md
new file mode 100644
index 0000000000000..80f5b1c44500d
--- /dev/null
+++ b/src/doc/unstable-book/src/compiler-flags/hint-mostly-unused.md
@@ -0,0 +1,33 @@
+# `hint-mostly-unused`
+
+This flag hints to the compiler that most of the crate will probably go unused.
+The compiler can optimize its operation based on this assumption, in order to
+compile faster. This is a hint, and does not guarantee any particular behavior.
+
+This option can substantially speed up compilation if applied to a large
+dependency where the majority of the dependency does not get used. This flag
+may slow down compilation in other cases.
+
+Currently, this option makes the compiler defer as much code generation as
+possible from functions in the crate, until later crates invoke those
+functions. Functions that never get invoked will never have code generated for
+them. For instance, if a crate provides thousands of functions, but only a few
+of them will get called, this flag will result in the compiler only doing code
+generation for the called functions. (This uses the same mechanisms as
+cross-crate inlining of functions.) This does not affect `extern` functions, or
+functions marked as `#[inline(never)]`.
+
+To try applying this flag to one dependency out of a dependency tree, use the
+[`profile-rustflags`](https://doc.rust-lang.org/cargo/reference/unstable.html#profile-rustflags-option)
+feature of nightly cargo:
+
+```toml
+cargo-features = ["profile-rustflags"]
+
+# ...
+[dependencies]
+mostly-unused-dependency = "1.2.3"
+
+[profile.release.package.mostly-unused-dependency]
+rustflags = ["-Zhint-mostly-unused"]
+```
diff --git a/src/librustdoc/json/conversions.rs b/src/librustdoc/json/conversions.rs
index 6bdf3b5fe3876..b9df594dcca96 100644
--- a/src/librustdoc/json/conversions.rs
+++ b/src/librustdoc/json/conversions.rs
@@ -170,10 +170,13 @@ pub(crate) fn from_deprecation(deprecation: attrs::Deprecation) -> Deprecation {
     Deprecation { since, note: note.map(|s| s.to_string()) }
 }
 
-impl FromClean<clean::GenericArgs> for GenericArgs {
+impl FromClean<clean::GenericArgs> for Option<Box<GenericArgs>> {
     fn from_clean(args: &clean::GenericArgs, renderer: &JsonRenderer<'_>) -> Self {
         use clean::GenericArgs::*;
-        match args {
+        if args.is_empty() {
+            return None;
+        }
+        Some(Box::new(match args {
             AngleBracketed { args, constraints } => GenericArgs::AngleBracketed {
                 args: args.into_json(renderer),
                 constraints: constraints.into_json(renderer),
@@ -183,7 +186,7 @@ impl FromClean<clean::GenericArgs> for GenericArgs {
                 output: output.as_ref().map(|a| a.as_ref().into_json(renderer)),
             },
             ReturnTypeNotation => GenericArgs::ReturnTypeNotation,
-        }
+        }))
     }
 }
 
@@ -580,7 +583,20 @@ impl FromClean<clean::Path> for Path {
         Path {
             path: path.whole_name(),
             id: renderer.id_from_item_default(path.def_id().into()),
-            args: path.segments.last().map(|args| Box::new(args.args.into_json(renderer))),
+            args: {
+                if let Some((final_seg, rest_segs)) = path.segments.split_last() {
+                    // In general, `clean::Path` can hold things like
+                    // `std::vec::Vec::<u32>::new`, where generic args appear
+                    // in a middle segment. But for the places where `Path` is
+                    // used by rustdoc-json-types, generic args can only be
+                    // used in the final segment, e.g. `std::vec::Vec<u32>`. So
+                    // check that the non-final segments have no generic args.
+                    assert!(rest_segs.iter().all(|seg| seg.args.is_empty()));
+                    final_seg.args.into_json(renderer)
+                } else {
+                    None // no generics on any segments because there are no segments
+                }
+            },
         }
     }
 }
@@ -591,7 +607,7 @@ impl FromClean<clean::QPathData> for Type {
 
         Self::QualifiedPath {
             name: assoc.name.to_string(),
-            args: Box::new(assoc.args.into_json(renderer)),
+            args: assoc.args.into_json(renderer),
             self_type: Box::new(self_type.into_json(renderer)),
             trait_: trait_.as_ref().map(|trait_| trait_.into_json(renderer)),
         }
diff --git a/src/rustdoc-json-types/lib.rs b/src/rustdoc-json-types/lib.rs
index 2c94b8f914f7d..cf9b86b967c0f 100644
--- a/src/rustdoc-json-types/lib.rs
+++ b/src/rustdoc-json-types/lib.rs
@@ -37,8 +37,8 @@ pub type FxHashMap<K, V> = HashMap<K, V>; // re-export for use in src/librustdoc
 // will instead cause conflicts. See #94591 for more. (This paragraph and the "Latest feature" line
 // are deliberately not in a doc comment, because they need not be in public docs.)
 //
-// Latest feature: rustdoc JSON: Don't apply #[repr] privacy heuristics
-pub const FORMAT_VERSION: u32 = 46;
+// Latest feature: improve handling of generic args
+pub const FORMAT_VERSION: u32 = 47;
 
 /// The root of the emitted JSON blob.
 ///
@@ -277,8 +277,8 @@ pub struct PolyTrait {
 /// A set of generic arguments provided to a path segment, e.g.
 ///
 /// ```text
-/// std::option::Option::<u32>::None
-///                      ^^^^^
+/// std::option::Option<u32>
+///                    ^^^^^
 /// ```
 #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
 #[serde(rename_all = "snake_case")]
@@ -331,7 +331,7 @@ pub enum GenericArg {
     Const(Constant),
     /// A generic argument that's explicitly set to be inferred.
     /// ```text
-    /// std::vec::Vec::<_>::new()
+    /// std::vec::Vec::<_>
     ///                 ^
     /// ```
     Infer,
@@ -362,7 +362,7 @@ pub struct AssocItemConstraint {
     /// The name of the associated type/constant.
     pub name: String,
     /// Arguments provided to the associated type/constant.
-    pub args: GenericArgs,
+    pub args: Option<Box<GenericArgs>>,
     /// The kind of bound applied to the associated type/constant.
     pub binding: AssocItemConstraintKind,
 }
@@ -1118,7 +1118,7 @@ pub enum Type {
         /// <core::slice::IterMut<'static, u32> as BetterIterator>::Item<'static>
         /// //                                                          ^^^^^^^^^
         /// ```
-        args: Box<GenericArgs>,
+        args: Option<Box<GenericArgs>>,
         /// The type with which this type is associated.
         ///
         /// ```ignore (incomplete expression)
diff --git a/src/rustdoc-json-types/tests.rs b/src/rustdoc-json-types/tests.rs
index b9363fcf1b714..c50eb61650126 100644
--- a/src/rustdoc-json-types/tests.rs
+++ b/src/rustdoc-json-types/tests.rs
@@ -38,3 +38,28 @@ fn test_union_info_roundtrip() {
     let decoded: ItemEnum = bincode::deserialize(&encoded).unwrap();
     assert_eq!(u, decoded);
 }
+
+// The memory used by a `Crate` can get large. These types are the ones that
+// contribute the most to its size.
+#[test]
+#[cfg(target_pointer_width = "64")]
+fn test_type_sizes() {
+    // tidy-alphabetical-start
+    assert_eq!(size_of::<AssocItemConstraint>(), 112);
+    assert_eq!(size_of::<Crate>(), 184);
+    assert_eq!(size_of::<ExternalCrate>(), 48);
+    assert_eq!(size_of::<FunctionPointer>(), 168);
+    assert_eq!(size_of::<GenericArg>(), 80);
+    assert_eq!(size_of::<GenericArgs>(), 104);
+    assert_eq!(size_of::<GenericBound>(), 72);
+    assert_eq!(size_of::<GenericParamDef>(), 136);
+    assert_eq!(size_of::<Impl>(), 304);
+    assert_eq!(size_of::<Item>(), 552);
+    assert_eq!(size_of::<ItemSummary>(), 32);
+    assert_eq!(size_of::<PolyTrait>(), 64);
+    assert_eq!(size_of::<PreciseCapturingArg>(), 32);
+    assert_eq!(size_of::<TargetFeature>(), 80);
+    assert_eq!(size_of::<Type>(), 80);
+    assert_eq!(size_of::<WherePredicate>(), 160);
+    // tidy-alphabetical-end
+}
diff --git a/src/tools/clippy/tests/ui/single_range_in_vec_init.rs b/src/tools/clippy/tests/ui/single_range_in_vec_init.rs
index 25884450b0842..0888019e101ce 100644
--- a/src/tools/clippy/tests/ui/single_range_in_vec_init.rs
+++ b/src/tools/clippy/tests/ui/single_range_in_vec_init.rs
@@ -2,7 +2,6 @@
 //@no-rustfix: overlapping suggestions
 #![allow(clippy::no_effect, clippy::unnecessary_operation, clippy::useless_vec, unused)]
 #![warn(clippy::single_range_in_vec_init)]
-#![feature(generic_arg_infer)]
 
 #[macro_use]
 extern crate proc_macros;
diff --git a/src/tools/clippy/tests/ui/single_range_in_vec_init.stderr b/src/tools/clippy/tests/ui/single_range_in_vec_init.stderr
index a99127a7606fb..b21338e38a3cb 100644
--- a/src/tools/clippy/tests/ui/single_range_in_vec_init.stderr
+++ b/src/tools/clippy/tests/ui/single_range_in_vec_init.stderr
@@ -1,5 +1,5 @@
 error: an array of `Range` that is only one element
-  --> tests/ui/single_range_in_vec_init.rs:26:5
+  --> tests/ui/single_range_in_vec_init.rs:25:5
    |
 LL |     [0..200];
    |     ^^^^^^^^
@@ -18,7 +18,7 @@ LL +     [0; 200];
    |
 
 error: a `Vec` of `Range` that is only one element
-  --> tests/ui/single_range_in_vec_init.rs:28:5
+  --> tests/ui/single_range_in_vec_init.rs:27:5
    |
 LL |     vec![0..200];
    |     ^^^^^^^^^^^^
@@ -35,7 +35,7 @@ LL +     vec![0; 200];
    |
 
 error: an array of `Range` that is only one element
-  --> tests/ui/single_range_in_vec_init.rs:30:5
+  --> tests/ui/single_range_in_vec_init.rs:29:5
    |
 LL |     [0u8..200];
    |     ^^^^^^^^^^
@@ -52,7 +52,7 @@ LL +     [0u8; 200];
    |
 
 error: an array of `Range` that is only one element
-  --> tests/ui/single_range_in_vec_init.rs:32:5
+  --> tests/ui/single_range_in_vec_init.rs:31:5
    |
 LL |     [0usize..200];
    |     ^^^^^^^^^^^^^
@@ -69,7 +69,7 @@ LL +     [0usize; 200];
    |
 
 error: an array of `Range` that is only one element
-  --> tests/ui/single_range_in_vec_init.rs:34:5
+  --> tests/ui/single_range_in_vec_init.rs:33:5
    |
 LL |     [0..200usize];
    |     ^^^^^^^^^^^^^
@@ -86,7 +86,7 @@ LL +     [0; 200usize];
    |
 
 error: a `Vec` of `Range` that is only one element
-  --> tests/ui/single_range_in_vec_init.rs:36:5
+  --> tests/ui/single_range_in_vec_init.rs:35:5
    |
 LL |     vec![0u8..200];
    |     ^^^^^^^^^^^^^^
@@ -103,7 +103,7 @@ LL +     vec![0u8; 200];
    |
 
 error: a `Vec` of `Range` that is only one element
-  --> tests/ui/single_range_in_vec_init.rs:38:5
+  --> tests/ui/single_range_in_vec_init.rs:37:5
    |
 LL |     vec![0usize..200];
    |     ^^^^^^^^^^^^^^^^^
@@ -120,7 +120,7 @@ LL +     vec![0usize; 200];
    |
 
 error: a `Vec` of `Range` that is only one element
-  --> tests/ui/single_range_in_vec_init.rs:40:5
+  --> tests/ui/single_range_in_vec_init.rs:39:5
    |
 LL |     vec![0..200usize];
    |     ^^^^^^^^^^^^^^^^^
@@ -137,7 +137,7 @@ LL +     vec![0; 200usize];
    |
 
 error: an array of `Range` that is only one element
-  --> tests/ui/single_range_in_vec_init.rs:43:5
+  --> tests/ui/single_range_in_vec_init.rs:42:5
    |
 LL |     [0..200isize];
    |     ^^^^^^^^^^^^^
@@ -149,7 +149,7 @@ LL +     (0..200isize).collect::<std::vec::Vec<isize>>();
    |
 
 error: a `Vec` of `Range` that is only one element
-  --> tests/ui/single_range_in_vec_init.rs:45:5
+  --> tests/ui/single_range_in_vec_init.rs:44:5
    |
 LL |     vec![0..200isize];
    |     ^^^^^^^^^^^^^^^^^
diff --git a/src/tools/jsondoclint/src/validator.rs b/src/tools/jsondoclint/src/validator.rs
index 8c9e4c8bb3a60..0a4051fcbe8cd 100644
--- a/src/tools/jsondoclint/src/validator.rs
+++ b/src/tools/jsondoclint/src/validator.rs
@@ -271,7 +271,7 @@ impl<'a> Validator<'a> {
             Type::RawPointer { is_mutable: _, type_ } => self.check_type(&**type_),
             Type::BorrowedRef { lifetime: _, is_mutable: _, type_ } => self.check_type(&**type_),
             Type::QualifiedPath { name: _, args, self_type, trait_ } => {
-                self.check_generic_args(&**args);
+                self.check_opt_generic_args(&args);
                 self.check_type(&**self_type);
                 if let Some(trait_) = trait_ {
                     self.check_path(trait_, PathKind::Trait);
@@ -309,13 +309,12 @@ impl<'a> Validator<'a> {
             self.fail(&x.id, ErrorKind::Custom(format!("No entry in '$.paths' for {x:?}")));
         }
 
-        if let Some(args) = &x.args {
-            self.check_generic_args(&**args);
-        }
+        self.check_opt_generic_args(&x.args);
     }
 
-    fn check_generic_args(&mut self, x: &'a GenericArgs) {
-        match x {
+    fn check_opt_generic_args(&mut self, x: &'a Option<Box<GenericArgs>>) {
+        let Some(x) = x else { return };
+        match &**x {
             GenericArgs::AngleBracketed { args, constraints } => {
                 args.iter().for_each(|arg| self.check_generic_arg(arg));
                 constraints.iter().for_each(|bind| self.check_assoc_item_constraint(bind));
@@ -355,7 +354,7 @@ impl<'a> Validator<'a> {
     }
 
     fn check_assoc_item_constraint(&mut self, bind: &'a AssocItemConstraint) {
-        self.check_generic_args(&bind.args);
+        self.check_opt_generic_args(&bind.args);
         match &bind.binding {
             AssocItemConstraintKind::Equality(term) => self.check_term(term),
             AssocItemConstraintKind::Constraint(bounds) => {
diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs
index 170dcd626a285..716d42c32ebed 100644
--- a/src/tools/tidy/src/deps.rs
+++ b/src/tools/tidy/src/deps.rs
@@ -356,6 +356,7 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[
     "rand",
     "rand_chacha",
     "rand_core",
+    "rand_xorshift", // dependency for doc-tests in rustc_thread_pool
     "rand_xoshiro",
     "redox_syscall",
     "regex",
@@ -364,7 +365,6 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[
     "rustc-demangle",
     "rustc-hash",
     "rustc-literal-escaper",
-    "rustc-rayon-core",
     "rustc-stable-hash",
     "rustc_apfloat",
     "rustix",
diff --git a/tests/codegen/frame-pointer-cli-control.rs b/tests/codegen/frame-pointer-cli-control.rs
new file mode 100644
index 0000000000000..a65dd132763de
--- /dev/null
+++ b/tests/codegen/frame-pointer-cli-control.rs
@@ -0,0 +1,61 @@
+//@ add-core-stubs
+//@ compile-flags: --crate-type=rlib -Copt-level=0
+//@ revisions: force-on aarch64-apple aarch64-apple-on aarch64-apple-off
+//@ [force-on] compile-flags: -Cforce-frame-pointers=on
+//@ [aarch64-apple] needs-llvm-components: aarch64
+//@ [aarch64-apple] compile-flags: --target=aarch64-apple-darwin
+//@ [aarch64-apple-on] needs-llvm-components: aarch64
+//@ [aarch64-apple-on] compile-flags: --target=aarch64-apple-darwin -Cforce-frame-pointers=on
+//@ [aarch64-apple-off] needs-llvm-components: aarch64
+//@ [aarch64-apple-off] compile-flags: --target=aarch64-apple-darwin -Cforce-frame-pointers=off
+/*!
+
+Tests the extent to which frame pointers can be controlled by the CLI.
+The behavior of our frame pointer options, at present, is an irreversible ratchet, where
+a "weaker" option that allows omitting frame pointers may be overridden by the target demanding
+that all code (or all non-leaf code, more often) must be compiled with frame pointers.
+This was discussed on 2025-05-22 in the T-compiler meeting and accepted as an intentional change,
+ratifying the prior decisions by compiler contributors and reviewers as correct,
+though it was also acknowledged that the flag allows somewhat confusing inputs.
+
+We find aarch64-apple-darwin useful because of its icy-clear policy regarding frame pointers,
+e.g. <https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms> says:
+
+* The frame pointer register (x29) must always address a valid frame record. Some functions —
+  such as leaf functions or tail calls — may opt not to create an entry in this list.
+  As a result, stack traces are always meaningful, even without debug information.
+
+Many Rust fn, if externally visible, may be expected to follow target ABI by tools or asm code!
+This can make it a problem to generate ABI-incorrect code, which may mean "with frame pointers".
+For this and other reasons, `-Cforce-frame-pointers=off` cannot override the target definition.
+This can cause some confusion because it is "reverse polarity" relative to C compilers, which have
+commands like `-fomit-frame-pointer`, `-fomit-leaf-frame-pointer`, or `-fno-omit-frame-pointer`!
+
+Specific cases where platforms or tools rely on frame pointers for sound or correct unwinding:
+- illumos: <https://smartos.org/bugview/OS-7515>
+- aarch64-windows: <https://github.com/rust-lang/rust/issues/123686>
+- aarch64-linux: <https://github.com/rust-lang/rust/issues/123733>
+- dtrace (freebsd and openbsd): <https://github.com/rust-lang/rust/issues/97723>
+- openbsd: <https://github.com/rust-lang/rust/issues/43575>
+- i686-msvc <https://github.com/rust-lang/backtrace-rs/pull/584#issuecomment-1966177530>
+- i686-mingw: <https://github.com/rust-lang/rust/commit/3f1d3948d6d434b34dd47f132c126a6cb6b8a4ab>
+*/
+#![feature(no_core, lang_items)]
+#![no_core]
+
+extern crate minicore;
+
+// CHECK: i32 @peach{{.*}}[[PEACH_ATTRS:\#[0-9]+]] {
+#[no_mangle]
+pub fn peach(x: u32) -> u32 {
+    x
+}
+
+// CHECK: attributes [[PEACH_ATTRS]] = {
+// force-on-SAME: {{.*}}"frame-pointer"="all"
+// aarch64-apple-SAME: {{.*}}"frame-pointer"="non-leaf"
+// aarch64-apple-on-SAME: {{.*}}"frame-pointer"="all"
+//
+// yes, we are testing this doesn't do anything:
+// aarch64-apple-off-SAME: {{.*}}"frame-pointer"="non-leaf"
+// CHECK-SAME: }
diff --git a/tests/crashes/111419.rs b/tests/crashes/111419.rs
index 3a1a13df1985a..36f15e1d0a26e 100644
--- a/tests/crashes/111419.rs
+++ b/tests/crashes/111419.rs
@@ -1,6 +1,6 @@
 //@ known-bug: #111419
 #![allow(incomplete_features)]
-#![feature(generic_const_exprs, generic_arg_infer)]
+#![feature(generic_const_exprs)]
 
 pub trait Example<const X: usize, const Y: usize, const Z: usize = { X + Y }>
 where
diff --git a/tests/mir-opt/copy-prop/write_to_borrowed.main.CopyProp.diff b/tests/mir-opt/copy-prop/write_to_borrowed.main.CopyProp.diff
new file mode 100644
index 0000000000000..eab06b1ba1e79
--- /dev/null
+++ b/tests/mir-opt/copy-prop/write_to_borrowed.main.CopyProp.diff
@@ -0,0 +1,30 @@
+- // MIR for `main` before CopyProp
++ // MIR for `main` after CopyProp
+  
+  fn main() -> () {
+      let mut _0: ();
+      let mut _1: *const char;
+      let mut _2: char;
+      let mut _3: char;
+      let mut _4: char;
+      let mut _5: char;
+      let mut _6: &char;
+      let mut _7: ();
+  
+      bb0: {
+          _1 = &raw const _2;
+          _3 = const 'b';
+          _5 = copy _3;
+          _6 = &_3;
+-         _4 = copy _5;
+          (*_1) = copy (*_6);
+          _6 = &_5;
+-         _7 = dump_var::<char>(copy _4) -> [return: bb1, unwind unreachable];
++         _7 = dump_var::<char>(copy _5) -> [return: bb1, unwind unreachable];
+      }
+  
+      bb1: {
+          return;
+      }
+  }
+  
diff --git a/tests/mir-opt/copy-prop/write_to_borrowed.rs b/tests/mir-opt/copy-prop/write_to_borrowed.rs
new file mode 100644
index 0000000000000..58809749103e1
--- /dev/null
+++ b/tests/mir-opt/copy-prop/write_to_borrowed.rs
@@ -0,0 +1,45 @@
+//@ test-mir-pass: CopyProp
+
+#![feature(custom_mir, core_intrinsics)]
+#![allow(internal_features)]
+
+use std::intrinsics::mir::*;
+
+#[custom_mir(dialect = "runtime")]
+fn main() {
+    mir! {
+        // Both _3 and _5 are borrowed, check that we do not unify them, and that we do not
+        // introduce a write to any of them.
+        let _1;
+        let _2;
+        let _3;
+        let _4;
+        let _5;
+        let _6;
+        let _7;
+        // CHECK: bb0: {
+        {
+            // CHECK-NEXT: _1 = &raw const _2;
+            _1 = core::ptr::addr_of!(_2);
+            // CHECK-NEXT: _3 = const 'b';
+            _3 = 'b';
+            // CHECK-NEXT: _5 = copy _3;
+            _5 = _3;
+            // CHECK-NEXT: _6 = &_3;
+            _6 = &_3;
+            // CHECK-NOT: {{_.*}} = {{_.*}};
+            _4 = _5;
+            // CHECK-NEXT: (*_1) = copy (*_6);
+            *_1 = *_6;
+            // CHECK-NEXT: _6 = &_5;
+            _6 = &_5;
+            // CHECK-NEXT: _7 = dump_var::<char>(copy _5)
+            Call(_7 = dump_var(_4), ReturnTo(bb1), UnwindUnreachable())
+        }
+        bb1 = { Return() }
+    }
+}
+
+fn dump_var<T>(_: T) {}
+
+// EMIT_MIR write_to_borrowed.main.CopyProp.diff
diff --git a/tests/rustdoc-json/generic-args.rs b/tests/rustdoc-json/generic-args.rs
new file mode 100644
index 0000000000000..0f588820da75d
--- /dev/null
+++ b/tests/rustdoc-json/generic-args.rs
@@ -0,0 +1,20 @@
+pub struct MyStruct(u32);
+
+pub trait MyTrait {
+    type MyType;
+    fn my_fn(&self);
+}
+
+impl MyTrait for MyStruct {
+    type MyType = u32;
+    fn my_fn(&self) {}
+}
+
+//@ is "$.index[?(@.name=='my_fn1')].inner.function.sig.inputs[0][1].qualified_path.args" null
+//@ is "$.index[?(@.name=='my_fn1')].inner.function.sig.inputs[0][1].qualified_path.self_type.resolved_path.args" null
+pub fn my_fn1(_: <MyStruct as MyTrait>::MyType) {}
+
+//@ is "$.index[?(@.name=='my_fn2')].inner.function.sig.inputs[0][1].dyn_trait.traits[0].trait.args.angle_bracketed.constraints[0].args" null
+pub fn my_fn2(_: IntoIterator<Item = MyStruct, IntoIter = impl Clone>) {}
+
+fn main() {}
diff --git a/tests/ui/array-slice-vec/suggest-array-length.fixed b/tests/ui/array-slice-vec/suggest-array-length.fixed
index 2eacc2517d310..ae1c6583c238b 100644
--- a/tests/ui/array-slice-vec/suggest-array-length.fixed
+++ b/tests/ui/array-slice-vec/suggest-array-length.fixed
@@ -10,14 +10,9 @@ fn main() {
     //~^ ERROR the placeholder `_` is not allowed within types on item signatures for static variables
     static REF_STATIK: &[u8; 1] = &[1];
     //~^ ERROR the placeholder `_` is not allowed within types on item signatures for static variables
-    let foo: [i32; 3] = [1, 2, 3];
-    //~^ ERROR using `_` for array lengths is unstable
-    let bar: [i32; 3] = [0; 3];
-    //~^ ERROR using `_` for array lengths is unstable
-    let ref_foo: &[i32; 3] = &[1, 2, 3];
-    //~^ ERROR using `_` for array lengths is unstable
-    let ref_bar: &[i32; 3] = &[0; 3];
-    //~^ ERROR using `_` for array lengths is unstable
-    let multiple_ref_foo: &&[i32; 3] = &&[1, 2, 3];
-    //~^ ERROR using `_` for array lengths is unstable
+    let foo: [i32; _] = [1, 2, 3];
+    let bar: [i32; _] = [0; 3];
+    let ref_foo: &[i32; _] = &[1, 2, 3];
+    let ref_bar: &[i32; _] = &[0; 3];
+    let multiple_ref_foo: &&[i32; _] = &&[1, 2, 3];
 }
diff --git a/tests/ui/array-slice-vec/suggest-array-length.rs b/tests/ui/array-slice-vec/suggest-array-length.rs
index fb4424cfed99d..e53118014b2a4 100644
--- a/tests/ui/array-slice-vec/suggest-array-length.rs
+++ b/tests/ui/array-slice-vec/suggest-array-length.rs
@@ -11,13 +11,8 @@ fn main() {
     static REF_STATIK: &[u8; _] = &[1];
     //~^ ERROR the placeholder `_` is not allowed within types on item signatures for static variables
     let foo: [i32; _] = [1, 2, 3];
-    //~^ ERROR using `_` for array lengths is unstable
     let bar: [i32; _] = [0; 3];
-    //~^ ERROR using `_` for array lengths is unstable
     let ref_foo: &[i32; _] = &[1, 2, 3];
-    //~^ ERROR using `_` for array lengths is unstable
     let ref_bar: &[i32; _] = &[0; 3];
-    //~^ ERROR using `_` for array lengths is unstable
     let multiple_ref_foo: &&[i32; _] = &&[1, 2, 3];
-    //~^ ERROR using `_` for array lengths is unstable
 }
diff --git a/tests/ui/array-slice-vec/suggest-array-length.stderr b/tests/ui/array-slice-vec/suggest-array-length.stderr
index 14d10832e3608..e498f2ca4f580 100644
--- a/tests/ui/array-slice-vec/suggest-array-length.stderr
+++ b/tests/ui/array-slice-vec/suggest-array-length.stderr
@@ -46,57 +46,6 @@ LL -     static REF_STATIK: &[u8; _] = &[1];
 LL +     static REF_STATIK: &[u8; 1] = &[1];
    |
 
-error[E0658]: using `_` for array lengths is unstable
-  --> $DIR/suggest-array-length.rs:13:20
-   |
-LL |     let foo: [i32; _] = [1, 2, 3];
-   |                    ^ help: consider specifying the array length: `3`
-   |
-   = note: see issue #85077 <https://github.com/rust-lang/rust/issues/85077> for more information
-   = help: add `#![feature(generic_arg_infer)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error[E0658]: using `_` for array lengths is unstable
-  --> $DIR/suggest-array-length.rs:15:20
-   |
-LL |     let bar: [i32; _] = [0; 3];
-   |                    ^ help: consider specifying the array length: `3`
-   |
-   = note: see issue #85077 <https://github.com/rust-lang/rust/issues/85077> for more information
-   = help: add `#![feature(generic_arg_infer)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error[E0658]: using `_` for array lengths is unstable
-  --> $DIR/suggest-array-length.rs:17:25
-   |
-LL |     let ref_foo: &[i32; _] = &[1, 2, 3];
-   |                         ^ help: consider specifying the array length: `3`
-   |
-   = note: see issue #85077 <https://github.com/rust-lang/rust/issues/85077> for more information
-   = help: add `#![feature(generic_arg_infer)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error[E0658]: using `_` for array lengths is unstable
-  --> $DIR/suggest-array-length.rs:19:25
-   |
-LL |     let ref_bar: &[i32; _] = &[0; 3];
-   |                         ^ help: consider specifying the array length: `3`
-   |
-   = note: see issue #85077 <https://github.com/rust-lang/rust/issues/85077> for more information
-   = help: add `#![feature(generic_arg_infer)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error[E0658]: using `_` for array lengths is unstable
-  --> $DIR/suggest-array-length.rs:21:35
-   |
-LL |     let multiple_ref_foo: &&[i32; _] = &&[1, 2, 3];
-   |                                   ^ help: consider specifying the array length: `3`
-   |
-   = note: see issue #85077 <https://github.com/rust-lang/rust/issues/85077> for more information
-   = help: add `#![feature(generic_arg_infer)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error: aborting due to 9 previous errors
+error: aborting due to 4 previous errors
 
-Some errors have detailed explanations: E0121, E0658.
-For more information about an error, try `rustc --explain E0121`.
+For more information about this error, try `rustc --explain E0121`.
diff --git a/tests/ui/async-await/issues/issue-95307.rs b/tests/ui/async-await/issues/issue-95307.rs
index 27903a667fb5c..83df65612b486 100644
--- a/tests/ui/async-await/issues/issue-95307.rs
+++ b/tests/ui/async-await/issues/issue-95307.rs
@@ -6,7 +6,6 @@
 pub trait C {
     async fn new() -> [u8; _];
     //~^ ERROR: the placeholder `_` is not allowed within types on item signatures for functions
-    //~| ERROR using `_` for array lengths is unstable
 }
 
 fn main() {}
diff --git a/tests/ui/async-await/issues/issue-95307.stderr b/tests/ui/async-await/issues/issue-95307.stderr
index 90100f391637a..c670686f7c9d5 100644
--- a/tests/ui/async-await/issues/issue-95307.stderr
+++ b/tests/ui/async-await/issues/issue-95307.stderr
@@ -4,17 +4,6 @@ error[E0121]: the placeholder `_` is not allowed within types on item signatures
 LL |     async fn new() -> [u8; _];
    |                            ^ not allowed in type signatures
 
-error[E0658]: using `_` for array lengths is unstable
-  --> $DIR/issue-95307.rs:7:28
-   |
-LL |     async fn new() -> [u8; _];
-   |                            ^
-   |
-   = note: see issue #85077 <https://github.com/rust-lang/rust/issues/85077> for more information
-   = help: add `#![feature(generic_arg_infer)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error: aborting due to 2 previous errors
+error: aborting due to 1 previous error
 
-Some errors have detailed explanations: E0121, E0658.
-For more information about an error, try `rustc --explain E0121`.
+For more information about this error, try `rustc --explain E0121`.
diff --git a/tests/ui/closures/binder/forbid_ambig_const_infers.rs b/tests/ui/closures/binder/forbid_ambig_const_infers.rs
index e9d783711ee3e..eb258e0ed9f75 100644
--- a/tests/ui/closures/binder/forbid_ambig_const_infers.rs
+++ b/tests/ui/closures/binder/forbid_ambig_const_infers.rs
@@ -1,4 +1,4 @@
-#![feature(generic_arg_infer, closure_lifetime_binder)]
+#![feature(closure_lifetime_binder)]
 
 struct Foo<const N: usize>([u32; N]);
 
diff --git a/tests/ui/closures/binder/forbid_ambig_type_infers.rs b/tests/ui/closures/binder/forbid_ambig_type_infers.rs
index 4e717ef3a179d..ca44a5db96d0f 100644
--- a/tests/ui/closures/binder/forbid_ambig_type_infers.rs
+++ b/tests/ui/closures/binder/forbid_ambig_type_infers.rs
@@ -1,4 +1,4 @@
-#![feature(generic_arg_infer, closure_lifetime_binder)]
+#![feature(closure_lifetime_binder)]
 
 struct Foo<T>(T);
 
diff --git a/tests/ui/closures/binder/forbid_const_infer.rs b/tests/ui/closures/binder/forbid_const_infer.rs
index f5b8bf188dfee..8c8f0456f503e 100644
--- a/tests/ui/closures/binder/forbid_const_infer.rs
+++ b/tests/ui/closures/binder/forbid_const_infer.rs
@@ -1,4 +1,4 @@
-#![feature(generic_arg_infer, closure_lifetime_binder)]
+#![feature(closure_lifetime_binder)]
 
 fn main() {
     let c = for<'a> |b: &'a [u32; _]| -> u32 { b[0] };
diff --git a/tests/ui/const-generics/associated_const_equality/equality_bound_with_infer.rs b/tests/ui/const-generics/associated_const_equality/equality_bound_with_infer.rs
index f45b7c3268b17..dc42e00c2e834 100644
--- a/tests/ui/const-generics/associated_const_equality/equality_bound_with_infer.rs
+++ b/tests/ui/const-generics/associated_const_equality/equality_bound_with_infer.rs
@@ -1,4 +1,4 @@
-#![feature(generic_arg_infer, associated_const_equality, generic_const_items)]
+#![feature(associated_const_equality, generic_const_items)]
 #![expect(incomplete_features)]
 
 // Regression test for #133066 where we would try to evaluate `<() as Foo>::ASSOC<_>` even
diff --git a/tests/ui/const-generics/generic_arg_infer/array-repeat-expr-lib.rs b/tests/ui/const-generics/generic_arg_infer/array-repeat-expr-lib.rs
index c1f725db126a0..c3a6767011483 100644
--- a/tests/ui/const-generics/generic_arg_infer/array-repeat-expr-lib.rs
+++ b/tests/ui/const-generics/generic_arg_infer/array-repeat-expr-lib.rs
@@ -1,6 +1,5 @@
 //@ check-pass
 
-#![feature(generic_arg_infer)]
 #![crate_type = "lib"]
 
 // Test that encoding the hallucinated `DefId` for the `_` const argument doesn't
diff --git a/tests/ui/const-generics/generic_arg_infer/array-repeat-expr.rs b/tests/ui/const-generics/generic_arg_infer/array-repeat-expr.rs
index 34091badfa7f1..fff9f2cc94d84 100644
--- a/tests/ui/const-generics/generic_arg_infer/array-repeat-expr.rs
+++ b/tests/ui/const-generics/generic_arg_infer/array-repeat-expr.rs
@@ -1,7 +1,6 @@
 //@ run-pass
 
 // To avoid having to `or` gate `_` as an expr.
-#![feature(generic_arg_infer)]
 
 fn foo() -> [u8; 3] {
     let x: [u8; _] = [0; _];
diff --git a/tests/ui/const-generics/generic_arg_infer/dont-use-defaults.rs b/tests/ui/const-generics/generic_arg_infer/dont-use-defaults.rs
index 613ea9da99da3..d4a1468c04965 100644
--- a/tests/ui/const-generics/generic_arg_infer/dont-use-defaults.rs
+++ b/tests/ui/const-generics/generic_arg_infer/dont-use-defaults.rs
@@ -1,7 +1,4 @@
 //@ run-pass
-#![feature(generic_arg_infer)]
-
-// test that we dont use defaults to aide in type inference
 
 struct Foo<const N: usize = 2>;
 impl<const N: usize> Foo<N> {
diff --git a/tests/ui/const-generics/generic_arg_infer/in-signature.rs b/tests/ui/const-generics/generic_arg_infer/in-signature.rs
index cd852a269435e..cd0235bf45aa8 100644
--- a/tests/ui/const-generics/generic_arg_infer/in-signature.rs
+++ b/tests/ui/const-generics/generic_arg_infer/in-signature.rs
@@ -1,5 +1,4 @@
 #![crate_type = "rlib"]
-#![feature(generic_arg_infer)]
 
 struct Foo<const N: usize>;
 struct Bar<T, const N: usize>(T);
diff --git a/tests/ui/const-generics/generic_arg_infer/in-signature.stderr b/tests/ui/const-generics/generic_arg_infer/in-signature.stderr
index 12d84268f955c..f964fc8d2f2c5 100644
--- a/tests/ui/const-generics/generic_arg_infer/in-signature.stderr
+++ b/tests/ui/const-generics/generic_arg_infer/in-signature.stderr
@@ -1,5 +1,5 @@
 error[E0121]: the placeholder `_` is not allowed within types on item signatures for return types
-  --> $DIR/in-signature.rs:7:21
+  --> $DIR/in-signature.rs:6:21
    |
 LL | fn arr_fn() -> [u8; _] {
    |                -----^-
@@ -8,7 +8,7 @@ LL | fn arr_fn() -> [u8; _] {
    |                help: replace with the correct return type: `[u8; 3]`
 
 error[E0121]: the placeholder `_` is not allowed within types on item signatures for return types
-  --> $DIR/in-signature.rs:12:24
+  --> $DIR/in-signature.rs:11:24
    |
 LL | fn ty_fn() -> Bar<i32, _> {
    |               ---------^-
@@ -17,7 +17,7 @@ LL | fn ty_fn() -> Bar<i32, _> {
    |               help: replace with the correct return type: `Bar<i32, 3>`
 
 error[E0121]: the placeholder `_` is not allowed within types on item signatures for return types
-  --> $DIR/in-signature.rs:17:25
+  --> $DIR/in-signature.rs:16:25
    |
 LL | fn ty_fn_mixed() -> Bar<_, _> {
    |                     ----^--^-
@@ -27,7 +27,7 @@ LL | fn ty_fn_mixed() -> Bar<_, _> {
    |                     help: replace with the correct return type: `Bar<i32, 3>`
 
 error[E0121]: the placeholder `_` is not allowed within types on item signatures for constants
-  --> $DIR/in-signature.rs:22:20
+  --> $DIR/in-signature.rs:21:20
    |
 LL | const ARR_CT: [u8; _] = [0; 3];
    |                    ^ not allowed in type signatures
@@ -39,7 +39,7 @@ LL + const ARR_CT: [u8; 3] = [0; 3];
    |
 
 error[E0121]: the placeholder `_` is not allowed within types on item signatures for static variables
-  --> $DIR/in-signature.rs:24:25
+  --> $DIR/in-signature.rs:23:25
    |
 LL | static ARR_STATIC: [u8; _] = [0; 3];
    |                         ^ not allowed in type signatures
@@ -51,7 +51,7 @@ LL + static ARR_STATIC: [u8; 3] = [0; 3];
    |
 
 error[E0121]: the placeholder `_` is not allowed within types on item signatures for constants
-  --> $DIR/in-signature.rs:26:23
+  --> $DIR/in-signature.rs:25:23
    |
 LL | const TY_CT: Bar<i32, _> = Bar::<i32, 3>(0);
    |                       ^ not allowed in type signatures
@@ -63,7 +63,7 @@ LL + const TY_CT: Bar<i32, 3> = Bar::<i32, 3>(0);
    |
 
 error[E0121]: the placeholder `_` is not allowed within types on item signatures for static variables
-  --> $DIR/in-signature.rs:28:28
+  --> $DIR/in-signature.rs:27:28
    |
 LL | static TY_STATIC: Bar<i32, _> = Bar::<i32, 3>(0);
    |                            ^ not allowed in type signatures
@@ -75,7 +75,7 @@ LL + static TY_STATIC: Bar<i32, 3> = Bar::<i32, 3>(0);
    |
 
 error[E0121]: the placeholder `_` is not allowed within types on item signatures for constants
-  --> $DIR/in-signature.rs:30:24
+  --> $DIR/in-signature.rs:29:24
    |
 LL | const TY_CT_MIXED: Bar<_, _> = Bar::<i32, 3>(0);
    |                        ^  ^ not allowed in type signatures
@@ -89,7 +89,7 @@ LL + const TY_CT_MIXED: Bar<i32, 3> = Bar::<i32, 3>(0);
    |
 
 error[E0121]: the placeholder `_` is not allowed within types on item signatures for static variables
-  --> $DIR/in-signature.rs:32:29
+  --> $DIR/in-signature.rs:31:29
    |
 LL | static TY_STATIC_MIXED: Bar<_, _> = Bar::<i32, 3>(0);
    |                             ^  ^ not allowed in type signatures
@@ -103,19 +103,19 @@ LL + static TY_STATIC_MIXED: Bar<i32, 3> = Bar::<i32, 3>(0);
    |
 
 error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated types
-  --> $DIR/in-signature.rs:51:23
+  --> $DIR/in-signature.rs:50:23
    |
 LL |     type Assoc = [u8; _];
    |                       ^ not allowed in type signatures
 
 error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated types
-  --> $DIR/in-signature.rs:55:27
+  --> $DIR/in-signature.rs:54:27
    |
 LL |     type Assoc = Bar<i32, _>;
    |                           ^ not allowed in type signatures
 
 error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated types
-  --> $DIR/in-signature.rs:59:22
+  --> $DIR/in-signature.rs:58:22
    |
 LL |     type Assoc = Bar<_, _>;
    |                      ^  ^ not allowed in type signatures
@@ -123,19 +123,19 @@ LL |     type Assoc = Bar<_, _>;
    |                      not allowed in type signatures
 
 error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated constants
-  --> $DIR/in-signature.rs:35:21
+  --> $DIR/in-signature.rs:34:21
    |
 LL |     const ARR: [u8; _];
    |                     ^ not allowed in type signatures
 
 error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated constants
-  --> $DIR/in-signature.rs:39:25
+  --> $DIR/in-signature.rs:38:25
    |
 LL |     const ARR: Bar<i32, _>;
    |                         ^ not allowed in type signatures
 
 error[E0121]: the placeholder `_` is not allowed within types on item signatures for associated constants
-  --> $DIR/in-signature.rs:43:20
+  --> $DIR/in-signature.rs:42:20
    |
 LL |     const ARR: Bar<_, _>;
    |                    ^  ^ not allowed in type signatures
diff --git a/tests/ui/const-generics/generic_arg_infer/infer-arg-test.rs b/tests/ui/const-generics/generic_arg_infer/infer-arg-test.rs
index c254b4ee09d2e..dcdcd250ea926 100644
--- a/tests/ui/const-generics/generic_arg_infer/infer-arg-test.rs
+++ b/tests/ui/const-generics/generic_arg_infer/infer-arg-test.rs
@@ -1,5 +1,3 @@
-#![feature(generic_arg_infer)]
-
 struct All<'a, T, const N: usize> {
   v: &'a T,
 }
diff --git a/tests/ui/const-generics/generic_arg_infer/infer-arg-test.stderr b/tests/ui/const-generics/generic_arg_infer/infer-arg-test.stderr
index a9c57dbf26a0e..88645f839fc57 100644
--- a/tests/ui/const-generics/generic_arg_infer/infer-arg-test.stderr
+++ b/tests/ui/const-generics/generic_arg_infer/infer-arg-test.stderr
@@ -1,17 +1,17 @@
 error: expected identifier, found reserved identifier `_`
-  --> $DIR/infer-arg-test.rs:7:17
+  --> $DIR/infer-arg-test.rs:5:17
    |
 LL | struct BadInfer<_>;
    |                 ^ expected identifier, found reserved identifier
 
 error: expected identifier, found reserved identifier `_`
-  --> $DIR/infer-arg-test.rs:13:17
+  --> $DIR/infer-arg-test.rs:11:17
    |
 LL | fn bad_infer_fn<_>() {}
    |                 ^ expected identifier, found reserved identifier
 
 error[E0392]: type parameter `_` is never used
-  --> $DIR/infer-arg-test.rs:7:17
+  --> $DIR/infer-arg-test.rs:5:17
    |
 LL | struct BadInfer<_>;
    |                 ^ unused type parameter
@@ -20,7 +20,7 @@ LL | struct BadInfer<_>;
    = help: if you intended `_` to be a const parameter, use `const _: /* Type */` instead
 
 error[E0107]: struct takes 2 generic arguments but 3 generic arguments were supplied
-  --> $DIR/infer-arg-test.rs:18:10
+  --> $DIR/infer-arg-test.rs:16:10
    |
 LL |   let a: All<_, _, _>;
    |          ^^^     --- help: remove the unnecessary generic argument
@@ -28,7 +28,7 @@ LL |   let a: All<_, _, _>;
    |          expected 2 generic arguments
    |
 note: struct defined here, with 2 generic parameters: `T`, `N`
-  --> $DIR/infer-arg-test.rs:3:8
+  --> $DIR/infer-arg-test.rs:1:8
    |
 LL | struct All<'a, T, const N: usize> {
    |        ^^^     -  --------------
diff --git a/tests/ui/const-generics/generic_arg_infer/infer_arg_and_const_arg.rs b/tests/ui/const-generics/generic_arg_infer/infer_arg_and_const_arg.rs
index 35b3fe4f4359b..e82250444d93f 100644
--- a/tests/ui/const-generics/generic_arg_infer/infer_arg_and_const_arg.rs
+++ b/tests/ui/const-generics/generic_arg_infer/infer_arg_and_const_arg.rs
@@ -1,5 +1,4 @@
 //@ check-pass
-#![feature(generic_arg_infer)]
 
 struct Foo<const N: bool, const M: u8>;
 struct Bar<const N: u8, const M: u32>;
diff --git a/tests/ui/const-generics/generic_arg_infer/issue-91614.rs b/tests/ui/const-generics/generic_arg_infer/issue-91614.rs
index a386b1e5c2bf5..4a3d85499da81 100644
--- a/tests/ui/const-generics/generic_arg_infer/issue-91614.rs
+++ b/tests/ui/const-generics/generic_arg_infer/issue-91614.rs
@@ -1,5 +1,4 @@
 #![feature(portable_simd)]
-#![feature(generic_arg_infer)]
 use std::simd::Mask;
 
 fn main() {
diff --git a/tests/ui/const-generics/generic_arg_infer/issue-91614.stderr b/tests/ui/const-generics/generic_arg_infer/issue-91614.stderr
index b07e1f29d0d90..164bcc7111ca6 100644
--- a/tests/ui/const-generics/generic_arg_infer/issue-91614.stderr
+++ b/tests/ui/const-generics/generic_arg_infer/issue-91614.stderr
@@ -1,5 +1,5 @@
 error[E0284]: type annotations needed for `Mask<_, _>`
-  --> $DIR/issue-91614.rs:6:9
+  --> $DIR/issue-91614.rs:5:9
    |
 LL |     let y = Mask::<_, _>::splat(false);
    |         ^   ------------ type must be known at this point
@@ -12,7 +12,7 @@ LL |     let y: Mask<_, N> = Mask::<_, _>::splat(false);
    |          ++++++++++++
 
 error[E0284]: type annotations needed for `Mask<_, _>`
-  --> $DIR/issue-91614.rs:6:9
+  --> $DIR/issue-91614.rs:5:9
    |
 LL |     let y = Mask::<_, _>::splat(false);
    |         ^   -------------------------- type must be known at this point
diff --git a/tests/ui/const-generics/generic_arg_infer/parend_infer.nogate.stderr b/tests/ui/const-generics/generic_arg_infer/parend_infer.nogate.stderr
deleted file mode 100644
index d0a5da9676df4..0000000000000
--- a/tests/ui/const-generics/generic_arg_infer/parend_infer.nogate.stderr
+++ /dev/null
@@ -1,53 +0,0 @@
-error[E0658]: const arguments cannot yet be inferred with `_`
-  --> $DIR/parend_infer.rs:24:16
-   |
-LL |     let c: Foo<_> = Foo::<1>;
-   |                ^
-   |
-   = note: see issue #85077 <https://github.com/rust-lang/rust/issues/85077> for more information
-   = help: add `#![feature(generic_arg_infer)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error[E0658]: const arguments cannot yet be inferred with `_`
-  --> $DIR/parend_infer.rs:26:16
-   |
-LL |     let c: Foo<(_)> = Foo::<1>;
-   |                ^^^
-   |
-   = note: see issue #85077 <https://github.com/rust-lang/rust/issues/85077> for more information
-   = help: add `#![feature(generic_arg_infer)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error[E0658]: const arguments cannot yet be inferred with `_`
-  --> $DIR/parend_infer.rs:28:16
-   |
-LL |     let c: Foo<(((_)))> = Foo::<1>;
-   |                ^^^^^^^
-   |
-   = note: see issue #85077 <https://github.com/rust-lang/rust/issues/85077> for more information
-   = help: add `#![feature(generic_arg_infer)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error[E0658]: using `_` for array lengths is unstable
-  --> $DIR/parend_infer.rs:17:17
-   |
-LL |     let b: [u8; (_)] = [1; (((((_)))))];
-   |                 ^^^
-   |
-   = note: see issue #85077 <https://github.com/rust-lang/rust/issues/85077> for more information
-   = help: add `#![feature(generic_arg_infer)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error[E0658]: using `_` for array lengths is unstable
-  --> $DIR/parend_infer.rs:17:28
-   |
-LL |     let b: [u8; (_)] = [1; (((((_)))))];
-   |                            ^^^^^^^^^^^
-   |
-   = note: see issue #85077 <https://github.com/rust-lang/rust/issues/85077> for more information
-   = help: add `#![feature(generic_arg_infer)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error: aborting due to 5 previous errors
-
-For more information about this error, try `rustc --explain E0658`.
diff --git a/tests/ui/const-generics/generic_arg_infer/parend_infer.rs b/tests/ui/const-generics/generic_arg_infer/parend_infer.rs
index 3dc27a702de40..9d7df8016cb1e 100644
--- a/tests/ui/const-generics/generic_arg_infer/parend_infer.rs
+++ b/tests/ui/const-generics/generic_arg_infer/parend_infer.rs
@@ -1,6 +1,4 @@
-//@[gate] check-pass
-//@ revisions: gate nogate
-#![cfg_attr(gate, feature(generic_arg_infer))]
+//@ check-pass
 
 struct Foo<const N: usize>;
 
@@ -15,16 +13,11 @@ fn main() {
     // AST Exprs similarly preserve parens for pretty printing reasons.
     #[rustfmt::skip]
     let b: [u8; (_)] = [1; (((((_)))))];
-    //[nogate]~^ error: using `_` for array lengths is unstable
-    //[nogate]~| error: using `_` for array lengths is unstable
     let b: [u8; 2] = b;
 
     // This is the same case as AST types as the parser doesn't distinguish between const
     // and type args when they share syntax
     let c: Foo<_> = Foo::<1>;
-    //[nogate]~^ error: const arguments cannot yet be inferred with `_`
     let c: Foo<(_)> = Foo::<1>;
-    //[nogate]~^ error: const arguments cannot yet be inferred with `_`
     let c: Foo<(((_)))> = Foo::<1>;
-    //[nogate]~^ error: const arguments cannot yet be inferred with `_`
 }
diff --git a/tests/ui/const-generics/generic_const_exprs/const_kind_expr/wf_obligation.rs b/tests/ui/const-generics/generic_const_exprs/const_kind_expr/wf_obligation.rs
index 6093fc70b1696..a82ea45b1232c 100644
--- a/tests/ui/const-generics/generic_const_exprs/const_kind_expr/wf_obligation.rs
+++ b/tests/ui/const-generics/generic_const_exprs/const_kind_expr/wf_obligation.rs
@@ -1,4 +1,4 @@
-#![feature(generic_const_exprs, generic_arg_infer)]
+#![feature(generic_const_exprs)]
 #![allow(incomplete_features)]
 
 // minimized repro for #105205
diff --git a/tests/ui/const-generics/generic_const_exprs/poly-const-uneval-ice-106423.rs b/tests/ui/const-generics/generic_const_exprs/poly-const-uneval-ice-106423.rs
index ed5ba32b62105..eca0404fcd01b 100644
--- a/tests/ui/const-generics/generic_const_exprs/poly-const-uneval-ice-106423.rs
+++ b/tests/ui/const-generics/generic_const_exprs/poly-const-uneval-ice-106423.rs
@@ -3,7 +3,7 @@
 //@ edition:2021
 //@ check-pass
 
-#![feature(generic_const_exprs, generic_arg_infer)]
+#![feature(generic_const_exprs)]
 #![allow(incomplete_features)]
 #![allow(unused)]
 
@@ -41,17 +41,13 @@ where
     DigitalFilter::Ba(zpk)
 }
 
-pub fn zpk2tf_st<const N: usize>(
-    _z: &Arr<f32, N>,
-    _p: &Arr<f32, N>,
-) -> BaFormatFilter<{ N + 1 }>
+pub fn zpk2tf_st<const N: usize>(_z: &Arr<f32, N>, _p: &Arr<f32, N>) -> BaFormatFilter<{ N + 1 }>
 where
     [(); N + 1]: Sized,
 {
     BaFormatFilter {}
 }
 
-
 fn main() {
-    iirfilter_st_copy::<4, 2>([10., 50.,]);
+    iirfilter_st_copy::<4, 2>([10., 50.]);
 }
diff --git a/tests/ui/const-generics/generic_const_parameter_types/bad_inference.rs b/tests/ui/const-generics/generic_const_parameter_types/bad_inference.rs
index de2e687e870c4..9748c14d655df 100644
--- a/tests/ui/const-generics/generic_const_parameter_types/bad_inference.rs
+++ b/tests/ui/const-generics/generic_const_parameter_types/bad_inference.rs
@@ -1,9 +1,4 @@
-#![feature(
-    adt_const_params,
-    unsized_const_params,
-    generic_const_parameter_types,
-    generic_arg_infer
-)]
+#![feature(adt_const_params, unsized_const_params, generic_const_parameter_types)]
 #![allow(incomplete_features)]
 
 use std::marker::ConstParamTy_;
diff --git a/tests/ui/const-generics/generic_const_parameter_types/bad_inference.stderr b/tests/ui/const-generics/generic_const_parameter_types/bad_inference.stderr
index 1ac67fe622b55..4652187b9ce18 100644
--- a/tests/ui/const-generics/generic_const_parameter_types/bad_inference.stderr
+++ b/tests/ui/const-generics/generic_const_parameter_types/bad_inference.stderr
@@ -1,11 +1,11 @@
 error: anonymous constants with inferred types are not yet supported
-  --> $DIR/bad_inference.rs:17:25
+  --> $DIR/bad_inference.rs:12:25
    |
 LL |     let a = foo::<_, _, { [12_u8; 2] }>();
    |                         ^^^^^^^^^^^^^^
 
 error: anonymous constants with inferred types are not yet supported
-  --> $DIR/bad_inference.rs:21:34
+  --> $DIR/bad_inference.rs:16:34
    |
 LL |     let b: [u8; 2] = foo::<_, _, { [12; _] }>();
    |                                  ^^^^^^^^^^^
diff --git a/tests/ui/const-generics/generic_const_parameter_types/evaluate_const_parameter_in_mir.rs b/tests/ui/const-generics/generic_const_parameter_types/evaluate_const_parameter_in_mir.rs
index 910deb6632d73..9f3ab1be2502e 100644
--- a/tests/ui/const-generics/generic_const_parameter_types/evaluate_const_parameter_in_mir.rs
+++ b/tests/ui/const-generics/generic_const_parameter_types/evaluate_const_parameter_in_mir.rs
@@ -1,11 +1,6 @@
 //@ check-pass
 
-#![feature(
-    adt_const_params,
-    unsized_const_params,
-    generic_const_parameter_types,
-    generic_arg_infer
-)]
+#![feature(adt_const_params, unsized_const_params, generic_const_parameter_types)]
 #![allow(incomplete_features)]
 
 use std::marker::ConstParamTy_;
diff --git a/tests/ui/const-generics/generic_const_parameter_types/inferred_from_arg.rs b/tests/ui/const-generics/generic_const_parameter_types/inferred_from_arg.rs
index d655fc174ee5c..a4e9aa54c01bb 100644
--- a/tests/ui/const-generics/generic_const_parameter_types/inferred_from_arg.rs
+++ b/tests/ui/const-generics/generic_const_parameter_types/inferred_from_arg.rs
@@ -1,6 +1,6 @@
 //@ check-pass
 
-#![feature(adt_const_params, generic_arg_infer, generic_const_parameter_types)]
+#![feature(adt_const_params, generic_const_parameter_types)]
 #![expect(incomplete_features)]
 
 struct Bar<const N: usize, const M: [u8; N]>;
diff --git a/tests/ui/const-generics/generic_const_parameter_types/unrelated_inferred_arg.rs b/tests/ui/const-generics/generic_const_parameter_types/unrelated_inferred_arg.rs
index b389e12884ea5..80117a27a23d2 100644
--- a/tests/ui/const-generics/generic_const_parameter_types/unrelated_inferred_arg.rs
+++ b/tests/ui/const-generics/generic_const_parameter_types/unrelated_inferred_arg.rs
@@ -1,11 +1,6 @@
 //@ check-pass
 
-#![feature(
-    adt_const_params,
-    unsized_const_params,
-    generic_const_parameter_types,
-    generic_arg_infer
-)]
+#![feature(adt_const_params, unsized_const_params, generic_const_parameter_types)]
 #![allow(incomplete_features)]
 
 use std::marker::ConstParamTy_;
diff --git a/tests/ui/const-generics/issues/issue-62878.min.stderr b/tests/ui/const-generics/issues/issue-62878.min.stderr
index d3d4fa4387109..d7ca0e1e2db59 100644
--- a/tests/ui/const-generics/issues/issue-62878.min.stderr
+++ b/tests/ui/const-generics/issues/issue-62878.min.stderr
@@ -16,17 +16,6 @@ help: add `#![feature(adt_const_params)]` to the crate attributes to enable more
 LL + #![feature(adt_const_params)]
    |
 
-error[E0658]: const arguments cannot yet be inferred with `_`
-  --> $DIR/issue-62878.rs:10:11
-   |
-LL |     foo::<_, { [1] }>();
-   |           ^
-   |
-   = note: see issue #85077 <https://github.com/rust-lang/rust/issues/85077> for more information
-   = help: add `#![feature(generic_arg_infer)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error: aborting due to 3 previous errors
+error: aborting due to 2 previous errors
 
-Some errors have detailed explanations: E0658, E0770.
-For more information about an error, try `rustc --explain E0658`.
+For more information about this error, try `rustc --explain E0770`.
diff --git a/tests/ui/const-generics/issues/issue-62878.rs b/tests/ui/const-generics/issues/issue-62878.rs
index c80b46ddbc440..0c143b34ce91e 100644
--- a/tests/ui/const-generics/issues/issue-62878.rs
+++ b/tests/ui/const-generics/issues/issue-62878.rs
@@ -1,5 +1,5 @@
 //@ revisions: full min
-#![cfg_attr(full, feature(adt_const_params, generic_arg_infer))]
+#![cfg_attr(full, feature(adt_const_params))]
 #![cfg_attr(full, allow(incomplete_features))]
 
 fn foo<const N: usize, const A: [u8; N]>() {}
@@ -8,5 +8,4 @@ fn foo<const N: usize, const A: [u8; N]>() {}
 
 fn main() {
     foo::<_, { [1] }>();
-    //[min]~^ ERROR: const arguments cannot yet be inferred with `_`
 }
diff --git a/tests/ui/const-generics/min_const_generics/inferred_const.rs b/tests/ui/const-generics/min_const_generics/inferred_const.rs
index 0256ef732a346..4a4fb417ab1dd 100644
--- a/tests/ui/const-generics/min_const_generics/inferred_const.rs
+++ b/tests/ui/const-generics/min_const_generics/inferred_const.rs
@@ -1,4 +1,3 @@
-#![feature(generic_arg_infer)]
 //@ run-pass
 
 fn foo<const N: usize, const K: usize>(_data: [u32; N]) -> [u32; K] {
diff --git a/tests/ui/feature-gates/feature-gate-generic_arg_infer.normal.stderr b/tests/ui/feature-gates/feature-gate-generic_arg_infer.normal.stderr
deleted file mode 100644
index 73e6988b09cc8..0000000000000
--- a/tests/ui/feature-gates/feature-gate-generic_arg_infer.normal.stderr
+++ /dev/null
@@ -1,33 +0,0 @@
-error[E0658]: using `_` for array lengths is unstable
-  --> $DIR/feature-gate-generic_arg_infer.rs:13:18
-   |
-LL |     let _y: [u8; _] = [0; 3];
-   |                  ^ help: consider specifying the array length: `3`
-   |
-   = note: see issue #85077 <https://github.com/rust-lang/rust/issues/85077> for more information
-   = help: add `#![feature(generic_arg_infer)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error[E0658]: const arguments cannot yet be inferred with `_`
-  --> $DIR/feature-gate-generic_arg_infer.rs:18:20
-   |
-LL |     let _x = foo::<_>([1, 2]);
-   |                    ^
-   |
-   = note: see issue #85077 <https://github.com/rust-lang/rust/issues/85077> for more information
-   = help: add `#![feature(generic_arg_infer)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error[E0658]: using `_` for array lengths is unstable
-  --> $DIR/feature-gate-generic_arg_infer.rs:11:27
-   |
-LL |     let _x: [u8; 3] = [0; _];
-   |                           ^
-   |
-   = note: see issue #85077 <https://github.com/rust-lang/rust/issues/85077> for more information
-   = help: add `#![feature(generic_arg_infer)]` to the crate attributes to enable
-   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
-
-error: aborting due to 3 previous errors
-
-For more information about this error, try `rustc --explain E0658`.
diff --git a/tests/ui/feature-gates/feature-gate-generic_arg_infer.rs b/tests/ui/feature-gates/feature-gate-generic_arg_infer.rs
deleted file mode 100644
index 147978b0557a6..0000000000000
--- a/tests/ui/feature-gates/feature-gate-generic_arg_infer.rs
+++ /dev/null
@@ -1,21 +0,0 @@
-//@ [feature] run-pass
-//@ revisions: normal feature
-
-#![cfg_attr(feature, feature(generic_arg_infer))]
-
-fn foo<const N: usize>(_: [u8; N]) -> [u8; N] {
-    [0; N]
-}
-
-fn bar() {
-    let _x: [u8; 3] = [0; _];
-    //[normal]~^ ERROR: using `_` for array lengths is unstable
-    let _y: [u8; _] = [0; 3];
-    //[normal]~^ ERROR: using `_` for array lengths is unstable
-}
-
-fn main() {
-    let _x = foo::<_>([1, 2]);
-    //[normal]~^ ERROR: const arguments cannot yet be inferred with `_`
-    bar();
-}
diff --git a/tests/ui/lang-items/lang-item-generic-requirements.rs b/tests/ui/lang-items/lang-item-generic-requirements.rs
index 168e22ad7db68..2f80567d9e71a 100644
--- a/tests/ui/lang-items/lang-item-generic-requirements.rs
+++ b/tests/ui/lang-items/lang-item-generic-requirements.rs
@@ -56,14 +56,14 @@ fn ice() {
     let arr = [0; 5];
     //~^ ERROR requires `copy` lang_item
     let _ = arr[2];
-    //~^ ERROR cannot index into a value of type `[{integer}; 5]`
+    //~^ ERROR: cannot index into a value of type `[{integer}; 5]`
 
     // Use phantomdata
     let _ = MyPhantomData::<(), i32>;
 
     // Use Foo
     let _: () = Foo;
-    //~^ ERROR mismatched types
+    //~^ ERROR: mismatched types
 }
 
 // use `start`
diff --git a/tests/ui/object-lifetime/object-lifetime-default-inferred.rs b/tests/ui/object-lifetime/object-lifetime-default-inferred.rs
index 5abe09e272920..1ab821764de08 100644
--- a/tests/ui/object-lifetime/object-lifetime-default-inferred.rs
+++ b/tests/ui/object-lifetime/object-lifetime-default-inferred.rs
@@ -4,7 +4,6 @@
 
 
 #![allow(dead_code)]
-#![feature(generic_arg_infer)]
 
 trait Test {
     fn foo(&self) { }
diff --git a/tests/ui/parser/issues/issue-14303-fncall.rs b/tests/ui/parser/issues/issue-14303-fncall.rs
index 8f7fbec947067..1e8ef6af3fa93 100644
--- a/tests/ui/parser/issues/issue-14303-fncall.rs
+++ b/tests/ui/parser/issues/issue-14303-fncall.rs
@@ -1,7 +1,5 @@
-//@ revisions: full generic_arg
 // can't run rustfix because it doesn't handle multipart suggestions correctly
 // we need the above to avoid ast borrowck failure in recovered code
-#![cfg_attr(generic_arg, feature(generic_arg_infer))]
 
 struct S<'a, T> {
     a: &'a T,
@@ -10,8 +8,7 @@ struct S<'a, T> {
 
 fn foo<'a, 'b>(start: &'a usize, end: &'a usize) {
     let _x = (*start..*end).map(|x| S { a: start, b: end }).collect::<Vec<S<_, 'a>>>();
-    //[generic_arg]~^ ERROR placeholder provided when a lifetime was expected
-    //[full]~^^ ERROR placeholder provided when a lifetime was expected
+    //~^ ERROR placeholder provided when a lifetime was expected
 }
 
 fn main() {}
diff --git a/tests/ui/parser/issues/issue-14303-fncall.stderr b/tests/ui/parser/issues/issue-14303-fncall.stderr
new file mode 100644
index 0000000000000..c42a23fa9d3e9
--- /dev/null
+++ b/tests/ui/parser/issues/issue-14303-fncall.stderr
@@ -0,0 +1,9 @@
+error[E0747]: placeholder provided when a lifetime was expected
+  --> $DIR/issue-14303-fncall.rs:10:77
+   |
+LL |     let _x = (*start..*end).map(|x| S { a: start, b: end }).collect::<Vec<S<_, 'a>>>();
+   |                                                                             ^
+
+error: aborting due to 1 previous error
+
+For more information about this error, try `rustc --explain E0747`.
diff --git a/tests/ui/pattern/slice-array-infer.rs b/tests/ui/pattern/slice-array-infer.rs
index fdead488ea11d..8d471b31beaf6 100644
--- a/tests/ui/pattern/slice-array-infer.rs
+++ b/tests/ui/pattern/slice-array-infer.rs
@@ -1,7 +1,6 @@
 //@ check-pass
 
 #![allow(unused_variables)]
-#![feature(generic_arg_infer)]
 
 struct Zeroes;
 impl Into<&'static [usize; 3]> for Zeroes {
diff --git a/tests/ui/repeat-expr/copy-check-const-element-uninferred-count.rs b/tests/ui/repeat-expr/copy-check-const-element-uninferred-count.rs
index 6115146539c19..722fdae6c99a2 100644
--- a/tests/ui/repeat-expr/copy-check-const-element-uninferred-count.rs
+++ b/tests/ui/repeat-expr/copy-check-const-element-uninferred-count.rs
@@ -1,5 +1,3 @@
-#![feature(generic_arg_infer)]
-
 // Test when deferring repeat expr copy checks to end of typechecking whether elements
 // that are const items allow for repeat counts to go uninferred without an error being
 // emitted if they would later wind up inferred by integer fallback.
diff --git a/tests/ui/repeat-expr/copy-check-const-element-uninferred-count.stderr b/tests/ui/repeat-expr/copy-check-const-element-uninferred-count.stderr
index 2f52537fa9407..9c9cfefd66383 100644
--- a/tests/ui/repeat-expr/copy-check-const-element-uninferred-count.stderr
+++ b/tests/ui/repeat-expr/copy-check-const-element-uninferred-count.stderr
@@ -1,5 +1,5 @@
 error[E0284]: type annotations needed for `[String; _]`
-  --> $DIR/copy-check-const-element-uninferred-count.rs:64:9
+  --> $DIR/copy-check-const-element-uninferred-count.rs:62:9
    |
 LL |     let a = [const { String::new() }; _];
    |         ^   ---------------------------- type must be known at this point
diff --git a/tests/ui/repeat-expr/copy-check-deferred-after-fallback.rs b/tests/ui/repeat-expr/copy-check-deferred-after-fallback.rs
index 3f310f07de0fe..a6bd5b299c96a 100644
--- a/tests/ui/repeat-expr/copy-check-deferred-after-fallback.rs
+++ b/tests/ui/repeat-expr/copy-check-deferred-after-fallback.rs
@@ -1,5 +1,3 @@
-#![feature(generic_arg_infer)]
-
 // Test when deferring repeat expr copy checks to end of typechecking whether they're
 // checked before integer fallback occurs or not. We accomplish this by having a repeat
 // count that can only be inferred after integer fallback has occured. This test will
diff --git a/tests/ui/repeat-expr/copy-check-deferred-after-fallback.stderr b/tests/ui/repeat-expr/copy-check-deferred-after-fallback.stderr
index 103b074dda7c8..0cd7ebe7494ff 100644
--- a/tests/ui/repeat-expr/copy-check-deferred-after-fallback.stderr
+++ b/tests/ui/repeat-expr/copy-check-deferred-after-fallback.stderr
@@ -1,5 +1,5 @@
 error[E0282]: type annotations needed for `[Foo<{integer}>; _]`
-  --> $DIR/copy-check-deferred-after-fallback.rs:39:9
+  --> $DIR/copy-check-deferred-after-fallback.rs:37:9
    |
 LL |     let b = [Foo(PhantomData); _];
    |         ^    ---------------- type must be known at this point
diff --git a/tests/ui/repeat-expr/copy-check-deferred-before-fallback.rs b/tests/ui/repeat-expr/copy-check-deferred-before-fallback.rs
index 4fbb8f0a00caf..23b13348f5a67 100644
--- a/tests/ui/repeat-expr/copy-check-deferred-before-fallback.rs
+++ b/tests/ui/repeat-expr/copy-check-deferred-before-fallback.rs
@@ -1,5 +1,4 @@
 //@ check-pass
-#![feature(generic_arg_infer)]
 
 // Test when deferring repeat expr checks to end of typechecking whether they're
 // checked before integer fallback occurs. We accomplish this by having the repeat
diff --git a/tests/ui/repeat-expr/copy-check-inference-side-effects.rs b/tests/ui/repeat-expr/copy-check-inference-side-effects.rs
index 4e3bfdead26b4..8587f1f9ce91c 100644
--- a/tests/ui/repeat-expr/copy-check-inference-side-effects.rs
+++ b/tests/ui/repeat-expr/copy-check-inference-side-effects.rs
@@ -1,5 +1,3 @@
-#![feature(generic_arg_infer)]
-
 struct Foo<const N: usize>;
 
 impl Clone for Foo<1> {
diff --git a/tests/ui/repeat-expr/copy-check-inference-side-effects.stderr b/tests/ui/repeat-expr/copy-check-inference-side-effects.stderr
index 505beff0f6b2e..bf4ae9b60bb31 100644
--- a/tests/ui/repeat-expr/copy-check-inference-side-effects.stderr
+++ b/tests/ui/repeat-expr/copy-check-inference-side-effects.stderr
@@ -1,5 +1,5 @@
 error[E0282]: type annotations needed for `[Foo<_>; 2]`
-  --> $DIR/copy-check-inference-side-effects.rs:17:9
+  --> $DIR/copy-check-inference-side-effects.rs:15:9
    |
 LL |     let a /* : [Foo<?x>; 2] */ = [Foo::<_>; 2];
    |         ^
@@ -13,7 +13,7 @@ LL |     let a: [Foo<N>; 2] /* : [Foo<?x>; 2] */ = [Foo::<_>; 2];
    |          +++++++++++++
 
 error[E0282]: type annotations needed for `[String; _]`
-  --> $DIR/copy-check-inference-side-effects.rs:27:9
+  --> $DIR/copy-check-inference-side-effects.rs:25:9
    |
 LL |     let b /* : [String; ?x] */ = ["string".to_string(); _];
    |         ^                         -------------------- type must be known at this point
diff --git a/tests/ui/repeat-expr/copy-check-when-count-inferred-later.rs b/tests/ui/repeat-expr/copy-check-when-count-inferred-later.rs
index b9d123cbefae6..72467e6f32e24 100644
--- a/tests/ui/repeat-expr/copy-check-when-count-inferred-later.rs
+++ b/tests/ui/repeat-expr/copy-check-when-count-inferred-later.rs
@@ -1,5 +1,3 @@
-#![feature(generic_arg_infer)]
-
 // Test that we enforce repeat expr element types are `Copy` even
 // when the repeat count is only inferred at a later point in type
 // checking.
diff --git a/tests/ui/repeat-expr/copy-check-when-count-inferred-later.stderr b/tests/ui/repeat-expr/copy-check-when-count-inferred-later.stderr
index 1c862f2b606a8..6b8049e77cc51 100644
--- a/tests/ui/repeat-expr/copy-check-when-count-inferred-later.stderr
+++ b/tests/ui/repeat-expr/copy-check-when-count-inferred-later.stderr
@@ -1,5 +1,5 @@
 error[E0277]: the trait bound `String: Copy` is not satisfied
-  --> $DIR/copy-check-when-count-inferred-later.rs:8:14
+  --> $DIR/copy-check-when-count-inferred-later.rs:6:14
    |
 LL |     let a = [String::new(); _];
    |              ^^^^^^^^^^^^^ the trait `Copy` is not implemented for `String`
diff --git a/tests/ui/repeat-expr/dont-require-copy-on-infer.rs b/tests/ui/repeat-expr/dont-require-copy-on-infer.rs
index e81bf1595be1f..ad0e4bd2be0ce 100644
--- a/tests/ui/repeat-expr/dont-require-copy-on-infer.rs
+++ b/tests/ui/repeat-expr/dont-require-copy-on-infer.rs
@@ -1,5 +1,4 @@
 //@ check-pass
-#![feature(generic_arg_infer)]
 
 fn main() {
     let a: [_; 1] = [String::new(); _];
diff --git a/tests/ui/repeat-expr/no-conservative-copy-impl-requirement.rs b/tests/ui/repeat-expr/no-conservative-copy-impl-requirement.rs
index eb70df62996fb..df79ad51b4215 100644
--- a/tests/ui/repeat-expr/no-conservative-copy-impl-requirement.rs
+++ b/tests/ui/repeat-expr/no-conservative-copy-impl-requirement.rs
@@ -1,5 +1,3 @@
-#![feature(generic_arg_infer)]
-
 struct Foo<const N: usize>;
 
 impl Clone for Foo<1> {
diff --git a/tests/ui/repeat-expr/no-conservative-copy-impl-requirement.stderr b/tests/ui/repeat-expr/no-conservative-copy-impl-requirement.stderr
index 04f8ff33fdab3..bf1e46e4ef829 100644
--- a/tests/ui/repeat-expr/no-conservative-copy-impl-requirement.stderr
+++ b/tests/ui/repeat-expr/no-conservative-copy-impl-requirement.stderr
@@ -1,5 +1,5 @@
 error[E0282]: type annotations needed for `&[Foo<_>; _]`
-  --> $DIR/no-conservative-copy-impl-requirement.rs:17:9
+  --> $DIR/no-conservative-copy-impl-requirement.rs:15:9
    |
 LL |     let x = &[Foo::<_>; _];
    |         ^     -------- type must be known at this point
diff --git a/tests/ui/simd/const-err-trumps-simd-err.rs b/tests/ui/simd/const-err-trumps-simd-err.rs
index 8d9870855f807..33f0abb06f3ea 100644
--- a/tests/ui/simd/const-err-trumps-simd-err.rs
+++ b/tests/ui/simd/const-err-trumps-simd-err.rs
@@ -4,7 +4,6 @@
 //! Make sure that monomorphization-time const errors from `static_assert` take priority over the
 //! error from simd_extract. Basically this checks that if a const fails to evaluate in some
 //! function, we don't bother codegen'ing the function.
-#![feature(generic_arg_infer)]
 #![feature(core_intrinsics)]
 #![feature(repr_simd)]
 
diff --git a/tests/ui/simd/const-err-trumps-simd-err.stderr b/tests/ui/simd/const-err-trumps-simd-err.stderr
index d4ba54a28da7e..93d1fce637f2f 100644
--- a/tests/ui/simd/const-err-trumps-simd-err.stderr
+++ b/tests/ui/simd/const-err-trumps-simd-err.stderr
@@ -1,17 +1,17 @@
 error[E0080]: evaluation panicked: assertion failed: LANE < 4
-  --> $DIR/const-err-trumps-simd-err.rs:18:13
+  --> $DIR/const-err-trumps-simd-err.rs:17:13
    |
 LL |     const { assert!(LANE < 4); } // the error should be here...
    |             ^^^^^^^^^^^^^^^^^ evaluation of `get_elem::<4>::{constant#0}` failed here
 
 note: erroneous constant encountered
-  --> $DIR/const-err-trumps-simd-err.rs:18:5
+  --> $DIR/const-err-trumps-simd-err.rs:17:5
    |
 LL |     const { assert!(LANE < 4); } // the error should be here...
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 note: the above error was encountered while instantiating `fn get_elem::<4>`
-  --> $DIR/const-err-trumps-simd-err.rs:24:5
+  --> $DIR/const-err-trumps-simd-err.rs:23:5
    |
 LL |     get_elem::<4>(int8x4_t([0, 0, 0, 0]));
    |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/tests/ui/span/issue-42234-unknown-receiver-type.rs b/tests/ui/span/issue-42234-unknown-receiver-type.rs
index 53d1e3eed820e..8f7bbf0fe5e09 100644
--- a/tests/ui/span/issue-42234-unknown-receiver-type.rs
+++ b/tests/ui/span/issue-42234-unknown-receiver-type.rs
@@ -1,6 +1,3 @@
-//@ revisions: full generic_arg
-#![cfg_attr(generic_arg, feature(generic_arg_infer))]
-
 // When the type of a method call's receiver is unknown, the span should point
 // to the receiver (and not the entire call, as was previously the case before
 // the fix of which this tests).
diff --git a/tests/ui/span/issue-42234-unknown-receiver-type.stderr b/tests/ui/span/issue-42234-unknown-receiver-type.stderr
new file mode 100644
index 0000000000000..10308ec07da5a
--- /dev/null
+++ b/tests/ui/span/issue-42234-unknown-receiver-type.stderr
@@ -0,0 +1,23 @@
+error[E0282]: type annotations needed
+  --> $DIR/issue-42234-unknown-receiver-type.rs:6:24
+   |
+LL |     let x: Option<_> = None;
+   |                        ^^^^ cannot infer type of the type parameter `T` declared on the enum `Option`
+LL |     x.unwrap().method_that_could_exist_on_some_type();
+   |     ---------- type must be known at this point
+   |
+help: consider specifying the generic argument
+   |
+LL |     let x: Option<_> = None::<T>;
+   |                            +++++
+
+error[E0282]: type annotations needed
+  --> $DIR/issue-42234-unknown-receiver-type.rs:12:10
+   |
+LL |         .sum::<_>()
+   |          ^^^ cannot infer type of the type parameter `S` declared on the method `sum`
+   |
+
+error: aborting due to 2 previous errors
+
+For more information about this error, try `rustc --explain E0282`.
diff --git a/tests/ui/traits/const-traits/tilde-const-and-const-params.rs b/tests/ui/traits/const-traits/tilde-const-and-const-params.rs
index 706c77b6200a8..428223d92c01e 100644
--- a/tests/ui/traits/const-traits/tilde-const-and-const-params.rs
+++ b/tests/ui/traits/const-traits/tilde-const-and-const-params.rs
@@ -1,5 +1,4 @@
 #![feature(const_trait_impl)]
-#![feature(generic_arg_infer)]
 #![feature(generic_const_exprs)]
 #![allow(incomplete_features)]
 
diff --git a/tests/ui/traits/const-traits/tilde-const-and-const-params.stderr b/tests/ui/traits/const-traits/tilde-const-and-const-params.stderr
index f77d63db054a0..95e684bd0c47d 100644
--- a/tests/ui/traits/const-traits/tilde-const-and-const-params.stderr
+++ b/tests/ui/traits/const-traits/tilde-const-and-const-params.stderr
@@ -1,35 +1,35 @@
 error: `~const` is not allowed here
-  --> $DIR/tilde-const-and-const-params.rs:9:15
+  --> $DIR/tilde-const-and-const-params.rs:8:15
    |
 LL |     fn add<A: ~const Add42>(self) -> Foo<{ A::add(N) }> {
    |               ^^^^^^
    |
 note: this function is not `const`, so it cannot have `~const` trait bounds
-  --> $DIR/tilde-const-and-const-params.rs:9:8
+  --> $DIR/tilde-const-and-const-params.rs:8:8
    |
 LL |     fn add<A: ~const Add42>(self) -> Foo<{ A::add(N) }> {
    |        ^^^
 
 error: `~const` is not allowed here
-  --> $DIR/tilde-const-and-const-params.rs:27:11
+  --> $DIR/tilde-const-and-const-params.rs:26:11
    |
 LL | fn bar<A: ~const Add42, const N: usize>(_: Foo<N>) -> Foo<{ A::add(N) }> {
    |           ^^^^^^
    |
 note: this function is not `const`, so it cannot have `~const` trait bounds
-  --> $DIR/tilde-const-and-const-params.rs:27:4
+  --> $DIR/tilde-const-and-const-params.rs:26:4
    |
 LL | fn bar<A: ~const Add42, const N: usize>(_: Foo<N>) -> Foo<{ A::add(N) }> {
    |    ^^^
 
 error[E0277]: the trait bound `A: const Add42` is not satisfied
-  --> $DIR/tilde-const-and-const-params.rs:27:61
+  --> $DIR/tilde-const-and-const-params.rs:26:61
    |
 LL | fn bar<A: ~const Add42, const N: usize>(_: Foo<N>) -> Foo<{ A::add(N) }> {
    |                                                             ^
 
 error[E0277]: the trait bound `A: const Add42` is not satisfied
-  --> $DIR/tilde-const-and-const-params.rs:9:44
+  --> $DIR/tilde-const-and-const-params.rs:8:44
    |
 LL |     fn add<A: ~const Add42>(self) -> Foo<{ A::add(N) }> {
    |                                            ^