|
1 | | -use crate::core::compiler::{Context, Unit}; |
| 1 | +use crate::core::compiler::{CompileMode, Context, CrateType, Unit}; |
2 | 2 | use crate::core::interning::InternedString; |
3 | 3 | use crate::core::profiles; |
4 | | -use crate::core::TargetKind; |
| 4 | + |
5 | 5 | use crate::util::errors::CargoResult; |
6 | 6 | use std::collections::hash_map::{Entry, HashMap}; |
7 | 7 |
|
8 | 8 | /// Possible ways to run rustc and request various parts of LTO. |
9 | | -#[derive(Copy, Clone, PartialEq, Eq, Hash)] |
| 9 | +/// |
| 10 | +/// Variant | Flag | Object Code | Bitcode |
| 11 | +/// -------------------|------------------------|-------------|-------- |
| 12 | +/// `Run` | `-C lto=foo` | n/a | n/a |
| 13 | +/// `Off` | `-C lto=off` | n/a | n/a |
| 14 | +/// `OnlyBitcode` | `-C linker-plugin-lto` | | ✓ |
| 15 | +/// `ObjectAndBitcode` | | ✓ | ✓ |
| 16 | +/// `OnlyObject` | `-C embed-bitcode=no` | ✓ | |
| 17 | +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] |
10 | 18 | pub enum Lto { |
11 | | - /// LTO is run for this rustc, and it's `-Clto=foo` where `foo` is optional. |
| 19 | + /// LTO is run for this rustc, and it's `-Clto=foo`. If the given value is |
| 20 | + /// None, that corresponds to `-Clto` with no argument, which means do |
| 21 | + /// "fat" LTO. |
12 | 22 | Run(Option<InternedString>), |
13 | 23 |
|
14 | | - /// This rustc invocation only needs to produce bitcode, there's no need to |
15 | | - /// produce object files, so we can pass `-Clinker-plugin-lto` |
| 24 | + /// LTO has been explicitly listed as "off". This means no thin-local-LTO, |
| 25 | + /// no LTO anywhere, I really mean it! |
| 26 | + Off, |
| 27 | + |
| 28 | + /// This rustc invocation only needs to produce bitcode (it is *only* used |
| 29 | + /// for LTO), there's no need to produce object files, so we can pass |
| 30 | + /// `-Clinker-plugin-lto` |
16 | 31 | OnlyBitcode, |
17 | 32 |
|
18 | 33 | /// This rustc invocation needs to embed bitcode in object files. This means |
19 | 34 | /// that object files may be used for a normal link, and the crate may be |
20 | 35 | /// loaded for LTO later, so both are required. |
21 | | - EmbedBitcode, |
| 36 | + ObjectAndBitcode, |
22 | 37 |
|
23 | | - /// Nothing related to LTO is required of this compilation. |
24 | | - None, |
| 38 | + /// This should not include bitcode. This is primarily to reduce disk |
| 39 | + /// space usage. |
| 40 | + OnlyObject, |
25 | 41 | } |
26 | 42 |
|
27 | 43 | pub fn generate(cx: &mut Context<'_, '_>) -> CargoResult<()> { |
28 | 44 | let mut map = HashMap::new(); |
29 | 45 | for unit in cx.bcx.roots.iter() { |
30 | | - calculate(cx, &mut map, unit, Lto::None)?; |
| 46 | + let root_lto = match unit.profile.lto { |
| 47 | + // LTO not requested, no need for bitcode. |
| 48 | + profiles::Lto::Bool(false) | profiles::Lto::Off => Lto::OnlyObject, |
| 49 | + _ => { |
| 50 | + let crate_types = unit.target.rustc_crate_types(); |
| 51 | + if unit.target.for_host() { |
| 52 | + Lto::OnlyObject |
| 53 | + } else if needs_object(&crate_types) { |
| 54 | + lto_when_needs_object(&crate_types) |
| 55 | + } else { |
| 56 | + // This may or may not participate in LTO, let's start |
| 57 | + // with the minimum requirements. This may be expanded in |
| 58 | + // `calculate` below if necessary. |
| 59 | + Lto::OnlyBitcode |
| 60 | + } |
| 61 | + } |
| 62 | + }; |
| 63 | + calculate(cx, &mut map, unit, root_lto)?; |
31 | 64 | } |
32 | 65 | cx.lto = map; |
33 | 66 | Ok(()) |
34 | 67 | } |
35 | 68 |
|
| 69 | +/// Whether or not any of these crate types need object code. |
| 70 | +fn needs_object(crate_types: &[CrateType]) -> bool { |
| 71 | + crate_types.iter().any(|k| k.can_lto() || k.is_dynamic()) |
| 72 | +} |
| 73 | + |
| 74 | +/// Lto setting to use when this unit needs object code. |
| 75 | +fn lto_when_needs_object(crate_types: &[CrateType]) -> Lto { |
| 76 | + if crate_types.iter().any(CrateType::can_lto) { |
| 77 | + // A mixed rlib/cdylib whose parent is running LTO. This |
| 78 | + // needs both, for bitcode in the rlib (for LTO) and the |
| 79 | + // cdylib requires object code. |
| 80 | + Lto::ObjectAndBitcode |
| 81 | + } else { |
| 82 | + // A dylib whose parent is running LTO. rustc currently |
| 83 | + // doesn't support LTO with dylibs, so bitcode is not |
| 84 | + // needed. |
| 85 | + Lto::OnlyObject |
| 86 | + } |
| 87 | +} |
| 88 | + |
36 | 89 | fn calculate( |
37 | 90 | cx: &Context<'_, '_>, |
38 | 91 | map: &mut HashMap<Unit, Lto>, |
39 | 92 | unit: &Unit, |
40 | | - lto_for_deps: Lto, |
| 93 | + parent_lto: Lto, |
41 | 94 | ) -> CargoResult<()> { |
42 | | - let (lto, lto_for_deps) = if unit.target.for_host() { |
| 95 | + let crate_types = match unit.mode { |
| 96 | + // Note: Doctest ignores LTO, but for now we'll compute it as-if it is |
| 97 | + // a Bin, in case it is ever supported in the future. |
| 98 | + CompileMode::Test | CompileMode::Bench | CompileMode::Doctest => vec![CrateType::Bin], |
| 99 | + // Notes on other modes: |
| 100 | + // - Check: Treat as the underlying type, it doesn't really matter. |
| 101 | + // - Doc: LTO is N/A for the Doc unit itself since rustdoc does not |
| 102 | + // support codegen flags. We still compute the dependencies, which |
| 103 | + // are mostly `Check`. |
| 104 | + // - RunCustomBuild is ignored because it is always "for_host". |
| 105 | + _ => unit.target.rustc_crate_types(), |
| 106 | + }; |
| 107 | + // LTO can only be performed if *all* of the crate types support it. |
| 108 | + // For example, a cdylib/rlib combination won't allow LTO. |
| 109 | + let all_lto_types = crate_types.iter().all(CrateType::can_lto); |
| 110 | + // Compute the LTO based on the profile, and what our parent requires. |
| 111 | + let lto = if unit.target.for_host() { |
43 | 112 | // Disable LTO for host builds since we only really want to perform LTO |
44 | 113 | // for the final binary, and LTO on plugins/build scripts/proc macros is |
45 | 114 | // largely not desired. |
46 | | - (Lto::None, Lto::None) |
47 | | - } else if unit.target.is_linkable() { |
48 | | - // A "linkable" target is one that produces and rlib or dylib in this |
49 | | - // case. In this scenario we cannot pass `-Clto` to the compiler because |
50 | | - // that is an invalid request, this is simply a dependency. What we do, |
51 | | - // however, is respect the request for whatever dependencies need to |
52 | | - // have. |
53 | | - // |
54 | | - // Here if no LTO is requested then we keep it turned off. Otherwise LTO |
55 | | - // is requested in some form, which means ideally we need just what's |
56 | | - // requested, nothing else. It's possible, though, to have libraries |
57 | | - // which are both a cdylib and and rlib, for example, which means that |
58 | | - // object files are getting sent to the linker. That means that we need |
59 | | - // to fully embed bitcode rather than simply generating just bitcode. |
60 | | - let has_non_linkable_lib = match unit.target.kind() { |
61 | | - TargetKind::Lib(kinds) => kinds.iter().any(|k| !k.is_linkable()), |
62 | | - _ => true, |
63 | | - }; |
64 | | - match lto_for_deps { |
65 | | - Lto::None => (Lto::None, Lto::None), |
66 | | - _ if has_non_linkable_lib => (Lto::EmbedBitcode, Lto::EmbedBitcode), |
67 | | - other => (other, other), |
| 115 | + Lto::OnlyObject |
| 116 | + } else if all_lto_types { |
| 117 | + // Note that this ignores the `parent_lto` because this isn't a |
| 118 | + // linkable crate type; this unit is not being embedded in the parent. |
| 119 | + match unit.profile.lto { |
| 120 | + profiles::Lto::Named(s) => Lto::Run(Some(s)), |
| 121 | + profiles::Lto::Off => Lto::Off, |
| 122 | + profiles::Lto::Bool(true) => Lto::Run(None), |
| 123 | + profiles::Lto::Bool(false) => Lto::OnlyObject, |
68 | 124 | } |
69 | 125 | } else { |
70 | | - // Otherwise this target can perform LTO and we're going to read the |
71 | | - // LTO value out of the profile. Note that we ignore `lto_for_deps` |
72 | | - // here because if a unit depends on another unit than can LTO this |
73 | | - // isn't a rustc-level dependency but rather a Cargo-level dependency. |
74 | | - // For example this is an integration test depending on a binary. |
75 | | - match unit.profile.lto { |
76 | | - profiles::Lto::Named(s) => match s.as_str() { |
77 | | - "n" | "no" | "off" => (Lto::Run(Some(s)), Lto::None), |
78 | | - _ => (Lto::Run(Some(s)), Lto::OnlyBitcode), |
79 | | - }, |
80 | | - profiles::Lto::Bool(true) => (Lto::Run(None), Lto::OnlyBitcode), |
81 | | - profiles::Lto::Bool(false) => (Lto::None, Lto::None), |
| 126 | + match (parent_lto, needs_object(&crate_types)) { |
| 127 | + // An rlib whose parent is running LTO, we only need bitcode. |
| 128 | + (Lto::Run(_), false) => Lto::OnlyBitcode, |
| 129 | + // LTO when something needs object code. |
| 130 | + (Lto::Run(_), true) | (Lto::OnlyBitcode, true) => lto_when_needs_object(&crate_types), |
| 131 | + // LTO is disabled, no need for bitcode. |
| 132 | + (Lto::Off, _) => Lto::OnlyObject, |
| 133 | + // If this doesn't have any requirements, or the requirements are |
| 134 | + // already satisfied, then stay with our parent. |
| 135 | + (_, false) | (Lto::OnlyObject, true) | (Lto::ObjectAndBitcode, true) => parent_lto, |
82 | 136 | } |
83 | 137 | }; |
84 | 138 |
|
85 | | - match map.entry(unit.clone()) { |
| 139 | + // Merge the computed LTO. If this unit appears multiple times in the |
| 140 | + // graph, the merge may expand the requirements. |
| 141 | + let merged_lto = match map.entry(unit.clone()) { |
86 | 142 | // If we haven't seen this unit before then insert our value and keep |
87 | 143 | // going. |
88 | | - Entry::Vacant(v) => { |
89 | | - v.insert(lto); |
90 | | - } |
| 144 | + Entry::Vacant(v) => *v.insert(lto), |
91 | 145 |
|
92 | 146 | Entry::Occupied(mut v) => { |
93 | 147 | let result = match (lto, v.get()) { |
| 148 | + // No change in requirements. |
| 149 | + (Lto::OnlyBitcode, Lto::OnlyBitcode) => Lto::OnlyBitcode, |
| 150 | + (Lto::OnlyObject, Lto::OnlyObject) => Lto::OnlyObject, |
| 151 | + |
94 | 152 | // Once we're running LTO we keep running LTO. We should always |
95 | 153 | // calculate the same thing here each iteration because if we |
96 | 154 | // see this twice then it means, for example, two unit tests |
97 | 155 | // depend on a binary, which is normal. |
98 | 156 | (Lto::Run(s), _) | (_, &Lto::Run(s)) => Lto::Run(s), |
99 | 157 |
|
100 | | - // If we calculated the same thing as before then we can bail |
101 | | - // out quickly. |
102 | | - (Lto::OnlyBitcode, Lto::OnlyBitcode) | (Lto::None, Lto::None) => return Ok(()), |
| 158 | + // Off means off! This has the same reasoning as `Lto::Run`. |
| 159 | + (Lto::Off, _) | (_, Lto::Off) => Lto::Off, |
| 160 | + |
| 161 | + // Once a target has requested both, that's the maximal amount |
| 162 | + // of work that can be done, so we just keep doing that work. |
| 163 | + (Lto::ObjectAndBitcode, _) | (_, Lto::ObjectAndBitcode) => Lto::ObjectAndBitcode, |
103 | 164 |
|
| 165 | + // Upgrade so that both requirements can be met. |
| 166 | + // |
104 | 167 | // This is where the trickiness happens. This unit needs |
105 | 168 | // bitcode and the previously calculated value for this unit |
106 | 169 | // says it didn't need bitcode (or vice versa). This means that |
107 | 170 | // we're a shared dependency between some targets which require |
108 | 171 | // LTO and some which don't. This means that instead of being |
109 | 172 | // either only-objects or only-bitcode we have to embed both in |
110 | 173 | // rlibs (used for different compilations), so we switch to |
111 | | - // embedding bitcode. |
112 | | - (Lto::OnlyBitcode, Lto::None) | (Lto::None, Lto::OnlyBitcode) => Lto::EmbedBitcode, |
113 | | - |
114 | | - // Once a target has requested bitcode embedding that's the |
115 | | - // maximal amount of work that can be done, so we just keep |
116 | | - // doing that work. |
117 | | - (Lto::EmbedBitcode, _) | (_, Lto::EmbedBitcode) => Lto::EmbedBitcode, |
| 174 | + // including both. |
| 175 | + (Lto::OnlyObject, Lto::OnlyBitcode) | (Lto::OnlyBitcode, Lto::OnlyObject) => { |
| 176 | + Lto::ObjectAndBitcode |
| 177 | + } |
118 | 178 | }; |
119 | 179 | // No need to recurse if we calculated the same value as before. |
120 | 180 | if result == *v.get() { |
121 | 181 | return Ok(()); |
122 | 182 | } |
123 | 183 | v.insert(result); |
| 184 | + result |
124 | 185 | } |
125 | | - } |
| 186 | + }; |
126 | 187 |
|
127 | 188 | for dep in cx.unit_deps(unit) { |
128 | | - calculate(cx, map, &dep.unit, lto_for_deps)?; |
| 189 | + calculate(cx, map, &dep.unit, merged_lto)?; |
129 | 190 | } |
130 | 191 | Ok(()) |
131 | 192 | } |
0 commit comments