@@ -656,30 +656,91 @@ impl Session {
656
656
return n as usize
657
657
}
658
658
659
+ // Why is 16 codegen units the default all the time?
660
+ //
661
+ // The main reason for enabling multiple codegen units by default is to
662
+ // leverage the ability for the trans backend to do translation and
663
+ // codegen in parallel. This allows us, especially for large crates, to
664
+ // make good use of all available resources on the machine once we've
665
+ // hit that stage of compilation. Large crates especially then often
666
+ // take a long time in trans/codegen and this helps us amortize that
667
+ // cost.
668
+ //
669
+ // Note that a high number here doesn't mean that we'll be spawning a
670
+ // large number of threads in parallel. The backend of rustc contains
671
+ // global rate limiting through the `jobserver` crate so we'll never
672
+ // overload the system with too much work, but rather we'll only be
673
+ // optimizing when we're otherwise cooperating with other instances of
674
+ // rustc.
675
+ //
676
+ // Rather a high number here means that we should be able to keep a lot
677
+ // of idle cpus busy. By ensuring that no codegen unit takes *too* long
678
+ // to build we'll be guaranteed that all cpus will finish pretty closely
679
+ // to one another and we should make relatively optimal use of system
680
+ // resources
681
+ //
682
+ // Note that the main cost of codegen units is that it prevents LLVM
683
+ // from inlining across codegen units. Users in general don't have a lot
684
+ // of control over how codegen units are split up so it's our job in the
685
+ // compiler to ensure that undue performance isn't lost when using
686
+ // codegen units (aka we can't require everyone to slap `#[inline]` on
687
+ // everything).
688
+ //
689
+ // If we're compiling at `-O0` then the number doesn't really matter too
690
+ // much because performance doesn't matter and inlining is ok to lose.
691
+ // In debug mode we just want to try to guarantee that no cpu is stuck
692
+ // doing work that could otherwise be farmed to others.
693
+ //
694
+ // In release mode, however (O1 and above) performance does indeed
695
+ // matter! To recover the loss in performance due to inlining we'll be
696
+ // enabling ThinLTO by default (the function for which is just below).
697
+ // This will ensure that we recover any inlining wins we otherwise lost
698
+ // through codegen unit partitioning.
699
+ //
700
+ // ---
701
+ //
702
+ // Ok that's a lot of words but the basic tl;dr; is that we want a high
703
+ // number here -- but not too high. Additionally we're "safe" to have it
704
+ // always at the same number at all optimization levels.
705
+ //
706
+ // As a result 16 was chosen here! Mostly because it was a power of 2
707
+ // and most benchmarks agreed it was roughly a local optimum. Not very
708
+ // scientific.
659
709
match self . opts . optimize {
660
- // If we're compiling at `-O0` then default to 16 codegen units.
661
- // The number here shouldn't matter too too much as debug mode
662
- // builds don't rely on performance at all, meaning that lost
663
- // opportunities for inlining through multiple codegen units is
664
- // a non-issue.
665
- //
666
- // Note that the high number here doesn't mean that we'll be
667
- // spawning a large number of threads in parallel. The backend
668
- // of rustc contains global rate limiting through the
669
- // `jobserver` crate so we'll never overload the system with too
670
- // much work, but rather we'll only be optimizing when we're
671
- // otherwise cooperating with other instances of rustc.
672
- //
673
- // Rather the high number here means that we should be able to
674
- // keep a lot of idle cpus busy. By ensuring that no codegen
675
- // unit takes *too* long to build we'll be guaranteed that all
676
- // cpus will finish pretty closely to one another and we should
677
- // make relatively optimal use of system resources
678
710
config:: OptLevel :: No => 16 ,
711
+ _ => 1 , // FIXME(#46346) this should be 16
712
+ }
713
+ }
679
714
680
- // All other optimization levels default use one codegen unit,
681
- // the historical default in Rust for a Long Time.
682
- _ => 1 ,
715
+ /// Returns whether ThinLTO is enabled for this compilation
716
+ pub fn thinlto ( & self ) -> bool {
717
+ // If processing command line options determined that we're incompatible
718
+ // with ThinLTO (e.g. `-C lto --emit llvm-ir`) then return that option.
719
+ if let Some ( enabled) = self . opts . cli_forced_thinlto {
720
+ return enabled
721
+ }
722
+
723
+ // If explicitly specified, use that with the next highest priority
724
+ if let Some ( enabled) = self . opts . debugging_opts . thinlto {
725
+ return enabled
726
+ }
727
+
728
+ // If there's only one codegen unit and LTO isn't enabled then there's
729
+ // no need for ThinLTO so just return false.
730
+ if self . codegen_units ( ) == 1 && !self . lto ( ) {
731
+ return false
732
+ }
733
+
734
+ // Right now ThinLTO isn't compatible with incremental compilation.
735
+ if self . opts . incremental . is_some ( ) {
736
+ return false
737
+ }
738
+
739
+ // Now we're in "defaults" territory. By default we enable ThinLTO for
740
+ // optimized compiles (anything greater than O0).
741
+ match self . opts . optimize {
742
+ config:: OptLevel :: No => false ,
743
+ _ => true ,
683
744
}
684
745
}
685
746
}
0 commit comments