Skip to content

Commit 855f6d1

Browse files
committed
rustc: Prepare to enable ThinLTO by default
This commit prepares to enable ThinLTO and multiple codegen units in release mode by default. We've still got a debuginfo bug or two to sort out before actually turning it on by default.
1 parent 7df4683 commit 855f6d1

File tree

8 files changed

+124
-32
lines changed

8 files changed

+124
-32
lines changed

src/librustc/session/config.rs

+15-3
Original file line numberDiff line numberDiff line change
@@ -383,8 +383,13 @@ top_level_options!(
383383
// try to not rely on this too much.
384384
actually_rustdoc: bool [TRACKED],
385385

386-
// Number of object files/codegen units to produce on the backend
386+
// Specifications of codegen units / ThinLTO which are forced as a
387+
// result of parsing command line options. These are not necessarily
388+
// what rustc was invoked with, but massaged a bit to agree with
389+
// commands like `--emit llvm-ir` which they're often incompatible with
390+
// if we otherwise use the defaults of rustc.
387391
cli_forced_codegen_units: Option<usize> [UNTRACKED],
392+
cli_forced_thinlto: Option<bool> [UNTRACKED],
388393
}
389394
);
390395

@@ -566,6 +571,7 @@ pub fn basic_options() -> Options {
566571
debug_assertions: true,
567572
actually_rustdoc: false,
568573
cli_forced_codegen_units: None,
574+
cli_forced_thinlto: None,
569575
}
570576
}
571577

@@ -1165,7 +1171,7 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
11651171
"run the non-lexical lifetimes MIR pass"),
11661172
trans_time_graph: bool = (false, parse_bool, [UNTRACKED],
11671173
"generate a graphical HTML report of time spent in trans and LLVM"),
1168-
thinlto: bool = (false, parse_bool, [TRACKED],
1174+
thinlto: Option<bool> = (None, parse_opt_bool, [TRACKED],
11691175
"enable ThinLTO when possible"),
11701176
inline_in_all_cgus: Option<bool> = (None, parse_opt_bool, [TRACKED],
11711177
"control whether #[inline] functions are in all cgus"),
@@ -1601,6 +1607,7 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
16011607

16021608
let mut cg = build_codegen_options(matches, error_format);
16031609
let mut codegen_units = cg.codegen_units;
1610+
let mut thinlto = None;
16041611

16051612
// Issue #30063: if user requests llvm-related output to one
16061613
// particular path, disable codegen-units.
@@ -1622,9 +1629,13 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
16221629
}
16231630
early_warn(error_format, "resetting to default -C codegen-units=1");
16241631
codegen_units = Some(1);
1632+
thinlto = Some(false);
16251633
}
16261634
}
1627-
_ => codegen_units = Some(1),
1635+
_ => {
1636+
codegen_units = Some(1);
1637+
thinlto = Some(false);
1638+
}
16281639
}
16291640
}
16301641

@@ -1834,6 +1845,7 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
18341845
debug_assertions,
18351846
actually_rustdoc: false,
18361847
cli_forced_codegen_units: codegen_units,
1848+
cli_forced_thinlto: thinlto,
18371849
},
18381850
cfg)
18391851
}

src/librustc/session/mod.rs

+82-21
Original file line numberDiff line numberDiff line change
@@ -656,30 +656,91 @@ impl Session {
656656
return n as usize
657657
}
658658

659+
// Why is 16 codegen units the default all the time?
660+
//
661+
// The main reason for enabling multiple codegen units by default is to
662+
// leverage the ability for the trans backend to do translation and
663+
// codegen in parallel. This allows us, especially for large crates, to
664+
// make good use of all available resources on the machine once we've
665+
// hit that stage of compilation. Large crates especially then often
666+
// take a long time in trans/codegen and this helps us amortize that
667+
// cost.
668+
//
669+
// Note that a high number here doesn't mean that we'll be spawning a
670+
// large number of threads in parallel. The backend of rustc contains
671+
// global rate limiting through the `jobserver` crate so we'll never
672+
// overload the system with too much work, but rather we'll only be
673+
// optimizing when we're otherwise cooperating with other instances of
674+
// rustc.
675+
//
676+
// Rather a high number here means that we should be able to keep a lot
677+
// of idle cpus busy. By ensuring that no codegen unit takes *too* long
678+
// to build we'll be guaranteed that all cpus will finish pretty closely
679+
// to one another and we should make relatively optimal use of system
680+
// resources
681+
//
682+
// Note that the main cost of codegen units is that it prevents LLVM
683+
// from inlining across codegen units. Users in general don't have a lot
684+
// of control over how codegen units are split up so it's our job in the
685+
// compiler to ensure that undue performance isn't lost when using
686+
// codegen units (aka we can't require everyone to slap `#[inline]` on
687+
// everything).
688+
//
689+
// If we're compiling at `-O0` then the number doesn't really matter too
690+
// much because performance doesn't matter and inlining is ok to lose.
691+
// In debug mode we just want to try to guarantee that no cpu is stuck
692+
// doing work that could otherwise be farmed to others.
693+
//
694+
// In release mode, however (O1 and above) performance does indeed
695+
// matter! To recover the loss in performance due to inlining we'll be
696+
// enabling ThinLTO by default (the function for which is just below).
697+
// This will ensure that we recover any inlining wins we otherwise lost
698+
// through codegen unit partitioning.
699+
//
700+
// ---
701+
//
702+
// Ok that's a lot of words but the basic tl;dr; is that we want a high
703+
// number here -- but not too high. Additionally we're "safe" to have it
704+
// always at the same number at all optimization levels.
705+
//
706+
// As a result 16 was chosen here! Mostly because it was a power of 2
707+
// and most benchmarks agreed it was roughly a local optimum. Not very
708+
// scientific.
659709
match self.opts.optimize {
660-
// If we're compiling at `-O0` then default to 16 codegen units.
661-
// The number here shouldn't matter too too much as debug mode
662-
// builds don't rely on performance at all, meaning that lost
663-
// opportunities for inlining through multiple codegen units is
664-
// a non-issue.
665-
//
666-
// Note that the high number here doesn't mean that we'll be
667-
// spawning a large number of threads in parallel. The backend
668-
// of rustc contains global rate limiting through the
669-
// `jobserver` crate so we'll never overload the system with too
670-
// much work, but rather we'll only be optimizing when we're
671-
// otherwise cooperating with other instances of rustc.
672-
//
673-
// Rather the high number here means that we should be able to
674-
// keep a lot of idle cpus busy. By ensuring that no codegen
675-
// unit takes *too* long to build we'll be guaranteed that all
676-
// cpus will finish pretty closely to one another and we should
677-
// make relatively optimal use of system resources
678710
config::OptLevel::No => 16,
711+
_ => 1, // FIXME(#46346) this should be 16
712+
}
713+
}
679714

680-
// All other optimization levels default use one codegen unit,
681-
// the historical default in Rust for a Long Time.
682-
_ => 1,
715+
/// Returns whether ThinLTO is enabled for this compilation
716+
pub fn thinlto(&self) -> bool {
717+
// If processing command line options determined that we're incompatible
718+
// with ThinLTO (e.g. `-C lto --emit llvm-ir`) then return that option.
719+
if let Some(enabled) = self.opts.cli_forced_thinlto {
720+
return enabled
721+
}
722+
723+
// If explicitly specified, use that with the next highest priority
724+
if let Some(enabled) = self.opts.debugging_opts.thinlto {
725+
return enabled
726+
}
727+
728+
// If there's only one codegen unit and LTO isn't enabled then there's
729+
// no need for ThinLTO so just return false.
730+
if self.codegen_units() == 1 && !self.lto() {
731+
return false
732+
}
733+
734+
// Right now ThinLTO isn't compatible with incremental compilation.
735+
if self.opts.incremental.is_some() {
736+
return false
737+
}
738+
739+
// Now we're in "defaults" territory. By default we enable ThinLTO for
740+
// optimized compiles (anything greater than O0).
741+
match self.opts.optimize {
742+
config::OptLevel::No => false,
743+
_ => true,
683744
}
684745
}
685746
}

src/librustc_trans/back/write.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -1402,8 +1402,9 @@ fn start_executing_work(tcx: TyCtxt,
14021402
// for doesn't require full LTO. Some targets require one LLVM module
14031403
// (they effectively don't have a linker) so it's up to us to use LTO to
14041404
// link everything together.
1405-
thinlto: sess.opts.debugging_opts.thinlto &&
1406-
!sess.target.target.options.requires_lto,
1405+
thinlto: sess.thinlto() &&
1406+
!sess.target.target.options.requires_lto &&
1407+
unsafe { llvm::LLVMRustThinLTOAvailable() },
14071408

14081409
no_landing_pads: sess.no_landing_pads(),
14091410
save_temps: sess.opts.cg.save_temps,

src/librustc_trans/base.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,7 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
706706

707707
check_for_rustc_errors_attr(tcx);
708708

709-
if tcx.sess.opts.debugging_opts.thinlto {
709+
if let Some(true) = tcx.sess.opts.debugging_opts.thinlto {
710710
if unsafe { !llvm::LLVMRustThinLTOAvailable() } {
711711
tcx.sess.fatal("this compiler's LLVM does not support ThinLTO");
712712
}

src/libstd/sys_common/backtrace.rs

+20-2
Original file line numberDiff line numberDiff line change
@@ -252,8 +252,26 @@ fn output_fileline(w: &mut Write,
252252
// Note that this demangler isn't quite as fancy as it could be. We have lots
253253
// of other information in our symbols like hashes, version, type information,
254254
// etc. Additionally, this doesn't handle glue symbols at all.
255-
pub fn demangle(writer: &mut Write, s: &str, format: PrintFormat) -> io::Result<()> {
256-
// First validate the symbol. If it doesn't look like anything we're
255+
pub fn demangle(writer: &mut Write, mut s: &str, format: PrintFormat) -> io::Result<()> {
256+
// During ThinLTO LLVM may import and rename internal symbols, so strip out
257+
// those endings first as they're one of the last manglings applied to
258+
// symbol names.
259+
let llvm = ".llvm.";
260+
if let Some(i) = s.find(llvm) {
261+
let candidate = &s[i + llvm.len()..];
262+
let all_hex = candidate.chars().all(|c| {
263+
match c {
264+
'A' ... 'F' | '0' ... '9' => true,
265+
_ => false,
266+
}
267+
});
268+
269+
if all_hex {
270+
s = &s[..i];
271+
}
272+
}
273+
274+
// Validate the symbol. If it doesn't look like anything we're
257275
// expecting, we just print it literally. Note that we must handle non-rust
258276
// symbols because we could have any function in the backtrace.
259277
let mut valid = true;

src/test/run-fail/mir_trans_no_landing_pads.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
// compile-flags: -Z no-landing-pads
11+
// compile-flags: -Z no-landing-pads -C codegen-units=1
1212
// error-pattern:converging_fn called
1313
use std::io::{self, Write};
1414

src/test/run-fail/mir_trans_no_landing_pads_diverging.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
// compile-flags: -Z no-landing-pads
11+
// compile-flags: -Z no-landing-pads -C codegen-units=1
1212
// error-pattern:diverging_fn called
1313
use std::io::{self, Write};
1414

src/test/run-pass/no-landing-pads.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
// compile-flags: -Z no-landing-pads
11+
// compile-flags: -Z no-landing-pads -C codegen-units=1
1212
// ignore-emscripten no threads support
1313

1414
use std::thread;

0 commit comments

Comments
 (0)