rust-lang
diff --git a/‎src/librustc/session/config.rs
+15-3 b/‎src/librustc/session/config.rs
+15-3
diff --git a/‎src/librustc/session/mod.rs
+82-21 b/‎src/librustc/session/mod.rs
+82-21
diff --git a/‎src/librustc_trans/back/write.rs
+3-2 b/‎src/librustc_trans/back/write.rs
+3-2
diff --git a/‎src/librustc_trans/base.rs
+1-1 b/‎src/librustc_trans/base.rs
+1-1
diff --git a/‎src/libstd/sys_common/backtrace.rs
+20-2 b/‎src/libstd/sys_common/backtrace.rs
+20-2
diff --git a/‎src/rustllvm/PassWrapper.cpp
+23-87 b/‎src/rustllvm/PassWrapper.cpp
+23-87
diff --git a/‎src/test/run-fail/mir_trans_no_landing_pads.rs
+1-1 b/‎src/test/run-fail/mir_trans_no_landing_pads.rs
+1-1
diff --git a/‎src/test/run-fail/mir_trans_no_landing_pads_diverging.rs
+1-1 b/‎src/test/run-fail/mir_trans_no_landing_pads_diverging.rs
+1-1
diff --git a/‎src/test/run-pass-fulldeps/auxiliary/issue_16723_multiple_items_syntax_ext.rs ‎src/test/run-pass-fulldeps/auxiliary/issue-16723.rs b/‎src/test/run-pass-fulldeps/auxiliary/issue_16723_multiple_items_syntax_ext.rs ‎src/test/run-pass-fulldeps/auxiliary/issue-16723.rs
diff --git a/‎src/test/run-pass-fulldeps/auxiliary/plugin_crate_outlive_expansion_phase.rs ‎src/test/run-pass-fulldeps/auxiliary/outlive-expansion-phase.rs b/‎src/test/run-pass-fulldeps/auxiliary/plugin_crate_outlive_expansion_phase.rs ‎src/test/run-pass-fulldeps/auxiliary/outlive-expansion-phase.rs
@@ -383,8 +383,13 @@ top_level_options!(
         // try to not rely on this too much.
         actually_rustdoc: bool [TRACKED],
 
-        // Number of object files/codegen units to produce on the backend
+        // Specifications of codegen units / ThinLTO which are forced as a
+        // result of parsing command line options. These are not necessarily
+        // what rustc was invoked with, but massaged a bit to agree with
+        // commands like `--emit llvm-ir` which they're often incompatible with
+        // if we otherwise use the defaults of rustc.
         cli_forced_codegen_units: Option<usize> [UNTRACKED],
+        cli_forced_thinlto: Option<bool> [UNTRACKED],
     }
 );
 
@@ -566,6 +571,7 @@ pub fn basic_options() -> Options {
         debug_assertions: true,
         actually_rustdoc: false,
         cli_forced_codegen_units: None,
+        cli_forced_thinlto: None,
     }
 }
 
@@ -1163,7 +1169,7 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
                  "run the non-lexical lifetimes MIR pass"),
     trans_time_graph: bool = (false, parse_bool, [UNTRACKED],
         "generate a graphical HTML report of time spent in trans and LLVM"),
-    thinlto: bool = (false, parse_bool, [TRACKED],
+    thinlto: Option<bool> = (None, parse_opt_bool, [TRACKED],
         "enable ThinLTO when possible"),
     inline_in_all_cgus: Option<bool> = (None, parse_opt_bool, [TRACKED],
         "control whether #[inline] functions are in all cgus"),
@@ -1599,6 +1605,7 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
 
     let mut cg = build_codegen_options(matches, error_format);
     let mut codegen_units = cg.codegen_units;
+    let mut thinlto = None;
 
     // Issue #30063: if user requests llvm-related output to one
     // particular path, disable codegen-units.
@@ -1620,9 +1627,13 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
                     }
                     early_warn(error_format, "resetting to default -C codegen-units=1");
                     codegen_units = Some(1);
+                    thinlto = Some(false);
                 }
             }
-            _ => codegen_units = Some(1),
+            _ => {
+                codegen_units = Some(1);
+                thinlto = Some(false);
+            }
         }
     }
 
@@ -1832,6 +1843,7 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
         debug_assertions,
         actually_rustdoc: false,
         cli_forced_codegen_units: codegen_units,
+        cli_forced_thinlto: thinlto,
     },
     cfg)
 }
 
@@ -656,30 +656,91 @@ impl Session {
             return n as usize
         }
 
+        // Why is 16 codegen units the default all the time?
+        //
+        // The main reason for enabling multiple codegen units by default is to
+        // leverage the ability for the trans backend to do translation and
+        // codegen in parallel. This allows us, especially for large crates, to
+        // make good use of all available resources on the machine once we've
+        // hit that stage of compilation. Large crates especially then often
+        // take a long time in trans/codegen and this helps us amortize that
+        // cost.
+        //
+        // Note that a high number here doesn't mean that we'll be spawning a
+        // large number of threads in parallel. The backend of rustc contains
+        // global rate limiting through the `jobserver` crate so we'll never
+        // overload the system with too much work, but rather we'll only be
+        // optimizing when we're otherwise cooperating with other instances of
+        // rustc.
+        //
+        // Rather a high number here means that we should be able to keep a lot
+        // of idle cpus busy. By ensuring that no codegen unit takes *too* long
+        // to build we'll be guaranteed that all cpus will finish pretty closely
+        // to one another and we should make relatively optimal use of system
+        // resources
+        //
+        // Note that the main cost of codegen units is that it prevents LLVM
+        // from inlining across codegen units. Users in general don't have a lot
+        // of control over how codegen units are split up so it's our job in the
+        // compiler to ensure that undue performance isn't lost when using
+        // codegen units (aka we can't require everyone to slap `#[inline]` on
+        // everything).
+        //
+        // If we're compiling at `-O0` then the number doesn't really matter too
+        // much because performance doesn't matter and inlining is ok to lose.
+        // In debug mode we just want to try to guarantee that no cpu is stuck
+        // doing work that could otherwise be farmed to others.
+        //
+        // In release mode, however (O1 and above) performance does indeed
+        // matter! To recover the loss in performance due to inlining we'll be
+        // enabling ThinLTO by default (the function for which is just below).
+        // This will ensure that we recover any inlining wins we otherwise lost
+        // through codegen unit partitioning.
+        //
+        // ---
+        //
+        // Ok that's a lot of words but the basic tl;dr; is that we want a high
+        // number here -- but not too high. Additionally we're "safe" to have it
+        // always at the same number at all optimization levels.
+        //
+        // As a result 16 was chosen here! Mostly because it was a power of 2
+        // and most benchmarks agreed it was roughly a local optimum. Not very
+        // scientific.
         match self.opts.optimize {
-            // If we're compiling at `-O0` then default to 16 codegen units.
-            // The number here shouldn't matter too too much as debug mode
-            // builds don't rely on performance at all, meaning that lost
-            // opportunities for inlining through multiple codegen units is
-            // a non-issue.
-            //
-            // Note that the high number here doesn't mean that we'll be
-            // spawning a large number of threads in parallel. The backend
-            // of rustc contains global rate limiting through the
-            // `jobserver` crate so we'll never overload the system with too
-            // much work, but rather we'll only be optimizing when we're
-            // otherwise cooperating with other instances of rustc.
-            //
-            // Rather the high number here means that we should be able to
-            // keep a lot of idle cpus busy. By ensuring that no codegen
-            // unit takes *too* long to build we'll be guaranteed that all
-            // cpus will finish pretty closely to one another and we should
-            // make relatively optimal use of system resources
             config::OptLevel::No => 16,
+            _ => 1, // FIXME(#46346) this should be 16
+        }
+    }
 
-            // All other optimization levels default use one codegen unit,
-            // the historical default in Rust for a Long Time.
-            _ => 1,
+    /// Returns whether ThinLTO is enabled for this compilation
+    pub fn thinlto(&self) -> bool {
+        // If processing command line options determined that we're incompatible
+        // with ThinLTO (e.g. `-C lto --emit llvm-ir`) then return that option.
+        if let Some(enabled) = self.opts.cli_forced_thinlto {
+            return enabled
+        }
+
+        // If explicitly specified, use that with the next highest priority
+        if let Some(enabled) = self.opts.debugging_opts.thinlto {
+            return enabled
+        }
+
+        // If there's only one codegen unit and LTO isn't enabled then there's
+        // no need for ThinLTO so just return false.
+        if self.codegen_units() == 1 && !self.lto() {
+            return false
+        }
+
+        // Right now ThinLTO isn't compatible with incremental compilation.
+        if self.opts.incremental.is_some() {
+            return false
+        }
+
+        // Now we're in "defaults" territory. By default we enable ThinLTO for
+        // optimized compiles (anything greater than O0).
+        match self.opts.optimize {
+            config::OptLevel::No => false,
+            _ => true,
         }
     }
 }
 
@@ -1402,8 +1402,9 @@ fn start_executing_work(tcx: TyCtxt,
         // for doesn't require full LTO. Some targets require one LLVM module
         // (they effectively don't have a linker) so it's up to us to use LTO to
         // link everything together.
-        thinlto: sess.opts.debugging_opts.thinlto &&
-            !sess.target.target.options.requires_lto,
+        thinlto: sess.thinlto() &&
+            !sess.target.target.options.requires_lto &&
+            unsafe { llvm::LLVMRustThinLTOAvailable() },
 
         no_landing_pads: sess.no_landing_pads(),
         save_temps: sess.opts.cg.save_temps,
 
@@ -704,7 +704,7 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
 
     check_for_rustc_errors_attr(tcx);
 
-    if tcx.sess.opts.debugging_opts.thinlto {
+    if let Some(true) = tcx.sess.opts.debugging_opts.thinlto {
         if unsafe { !llvm::LLVMRustThinLTOAvailable() } {
             tcx.sess.fatal("this compiler's LLVM does not support ThinLTO");
         }
 
@@ -252,8 +252,26 @@ fn output_fileline(w: &mut Write,
 // Note that this demangler isn't quite as fancy as it could be. We have lots
 // of other information in our symbols like hashes, version, type information,
 // etc. Additionally, this doesn't handle glue symbols at all.
-pub fn demangle(writer: &mut Write, s: &str, format: PrintFormat) -> io::Result<()> {
-    // First validate the symbol. If it doesn't look like anything we're
+pub fn demangle(writer: &mut Write, mut s: &str, format: PrintFormat) -> io::Result<()> {
+    // During ThinLTO LLVM may import and rename internal symbols, so strip out
+    // those endings first as they're one of the last manglings applied to
+    // symbol names.
+    let llvm = ".llvm.";
+    if let Some(i) = s.find(llvm) {
+        let candidate = &s[i + llvm.len()..];
+        let all_hex = candidate.chars().all(|c| {
+            match c {
+                'A' ... 'F' | '0' ... '9' => true,
+                _ => false,
+            }
+        });
+
+        if all_hex {
+            s = &s[..i];
+        }
+    }
+
+    // Validate the symbol. If it doesn't look like anything we're
     // expecting, we just print it literally. Note that we must handle non-rust
     // symbols because we could have any function in the backtrace.
     let mut valid = true;
 
@@ -11,6 +11,7 @@
 #include <stdio.h>
 
 #include <vector>
+#include <set>
 
 #include "rustllvm.h"
 
@@ -885,86 +886,6 @@ getFirstDefinitionForLinker(const GlobalValueSummaryList &GVSummaryList) {
   return FirstDefForLinker->get();
 }
 
-// This is a helper function we added that isn't present in LLVM's source.
-//
-// The way LTO works in Rust is that we typically have a number of symbols that
-// we know ahead of time need to be preserved. We want to ensure that ThinLTO
-// doesn't accidentally internalize any of these and otherwise is always
-// ready to keep them linking correctly.
-//
-// This function will recursively walk the `GUID` provided and all of its
-// references, as specified in the `Index`. In other words, we're taking a
-// `GUID` as input, adding it to `Preserved`, and then taking all `GUID`
-// items that the input references and recursing.
-static void
-addPreservedGUID(const ModuleSummaryIndex &Index,
-                 DenseSet<GlobalValue::GUID> &Preserved,
-                 GlobalValue::GUID GUID) {
-  if (Preserved.count(GUID))
-    return;
-  Preserved.insert(GUID);
-
-#if LLVM_VERSION_GE(5, 0)
-  auto Info = Index.getValueInfo(GUID);
-  if (!Info) {
-    return;
-  }
-  for (auto &Summary : Info.getSummaryList()) {
-    for (auto &Ref : Summary->refs()) {
-      addPreservedGUID(Index, Preserved, Ref.getGUID());
-    }
-
-    GlobalValueSummary *GVSummary = Summary.get();
-    if (isa<FunctionSummary>(GVSummary)) {
-      auto *FS = cast<FunctionSummary>(GVSummary);
-      for (auto &Call: FS->calls()) {
-        addPreservedGUID(Index, Preserved, Call.first.getGUID());
-      }
-      for (auto &GUID: FS->type_tests()) {
-        addPreservedGUID(Index, Preserved, GUID);
-      }
-    }
-    if (isa<AliasSummary>(GVSummary)) {
-      auto *AS = cast<AliasSummary>(GVSummary);
-      auto GUID = AS->getAliasee().getOriginalName();
-      addPreservedGUID(Index, Preserved, GUID);
-    }
-  }
-#else
-  auto SummaryList = Index.findGlobalValueSummaryList(GUID);
-  if (SummaryList == Index.end())
-    return;
-  for (auto &Summary : SummaryList->second) {
-    for (auto &Ref : Summary->refs()) {
-      if (Ref.isGUID()) {
-        addPreservedGUID(Index, Preserved, Ref.getGUID());
-      } else {
-        auto Value = Ref.getValue();
-        addPreservedGUID(Index, Preserved, Value->getGUID());
-      }
-    }
-
-    if (auto *FS = dyn_cast<FunctionSummary>(Summary.get())) {
-      for (auto &Call: FS->calls()) {
-        if (Call.first.isGUID()) {
-          addPreservedGUID(Index, Preserved, Call.first.getGUID());
-        } else {
-          auto Value = Call.first.getValue();
-          addPreservedGUID(Index, Preserved, Value->getGUID());
-        }
-      }
-      for (auto &GUID: FS->type_tests()) {
-        addPreservedGUID(Index, Preserved, GUID);
-      }
-    }
-    if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
-      auto GUID = AS->getAliasee().getOriginalName();
-      addPreservedGUID(Index, Preserved, GUID);
-    }
-  }
-#endif
-}
-
 // The main entry point for creating the global ThinLTO analysis. The structure
 // here is basically the same as before threads are spawned in the `run`
 // function of `lib/LTO/ThinLTOCodeGenerator.cpp`.
@@ -1004,12 +925,10 @@ LLVMRustCreateThinLTOData(LLVMRustThinLTOModule *modules,
   Ret->Index.collectDefinedGVSummariesPerModule(Ret->ModuleToDefinedGVSummaries);
 
   // Convert the preserved symbols set from string to GUID, this is then needed
-  // for internalization. We use `addPreservedGUID` to include any transitively
-  // used symbol as well.
+  // for internalization.
   for (int i = 0; i < num_symbols; i++) {
-    addPreservedGUID(Ret->Index,
-                     Ret->GUIDPreservedSymbols,
-                     GlobalValue::getGUID(preserved_symbols[i]));
+    auto GUID = GlobalValue::getGUID(preserved_symbols[i]);
+    Ret->GUIDPreservedSymbols.insert(GUID);
   }
 
   // Collect the import/export lists for all modules from the call-graph in the
@@ -1038,7 +957,8 @@ LLVMRustCreateThinLTOData(LLVMRustThinLTOModule *modules,
   // Resolve LinkOnce/Weak symbols, this has to be computed early be cause it
   // impacts the caching.
   //
-  // This is copied from `lib/LTO/ThinLTOCodeGenerator.cpp`
+  // This is copied from `lib/LTO/ThinLTOCodeGenerator.cpp` with some of this
+  // being lifted from `lib/LTO/LTO.cpp` as well
   StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
   DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
   for (auto &I : Ret->Index) {
@@ -1062,11 +982,27 @@ LLVMRustCreateThinLTOData(LLVMRustThinLTOModule *modules,
     ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
   };
   thinLTOResolveWeakForLinkerInIndex(Ret->Index, isPrevailing, recordNewLinkage);
+
+  // Here we calculate an `ExportedGUIDs` set for use in the `isExported`
+  // callback below. This callback below will dictate the linkage for all
+  // summaries in the index, and we basically just only want to ensure that dead
+  // symbols are internalized. Otherwise everything that's already external
+  // linkage will stay as external, and internal will stay as internal.
+  std::set<GlobalValue::GUID> ExportedGUIDs;
+  for (auto &List : Ret->Index) {
+    for (auto &GVS: List.second) {
+      if (!GlobalValue::isExternalLinkage(GVS->linkage()))
+        continue;
+      auto GUID = GVS->getOriginalName();
+      if (!DeadSymbols.count(GUID))
+        ExportedGUIDs.insert(GUID);
+    }
+  }
   auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
     const auto &ExportList = Ret->ExportLists.find(ModuleIdentifier);
     return (ExportList != Ret->ExportLists.end() &&
       ExportList->second.count(GUID)) ||
-      Ret->GUIDPreservedSymbols.count(GUID);
+      ExportedGUIDs.count(GUID);
   };
   thinLTOInternalizeAndPromoteInIndex(Ret->Index, isExported);
 
 
@@ -8,7 +8,7 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-// compile-flags: -Z no-landing-pads
+// compile-flags: -Z no-landing-pads -C codegen-units=1
 // error-pattern:converging_fn called
 use std::io::{self, Write};
 
 
@@ -8,7 +8,7 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-// compile-flags: -Z no-landing-pads
+// compile-flags: -Z no-landing-pads -C codegen-units=1
 // error-pattern:diverging_fn called
 use std::io::{self, Write};
Original file line number	Diff line number	Diff line change
`@@ -704,7 +704,7 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,`
`704`	`704`
`705`	`705`	`check_for_rustc_errors_attr(tcx);`
`706`	`706`
`707`		`- if tcx.sess.opts.debugging_opts.thinlto {`
	`707`	`+ if let Some(true) = tcx.sess.opts.debugging_opts.thinlto {`
`708`	`708`	`if unsafe { !llvm::LLVMRustThinLTOAvailable() } {`
`709`	`709`	`tcx.sess.fatal("this compiler's LLVM does not support ThinLTO");`
`710`	`710`	`}`