Skip to content

Commit

Permalink
Auto merge of #8349 - ehuss:fix-lto, r=alexcrichton
Browse files Browse the repository at this point in the history
Some LTO fixes.

This reworks the LTO computation a little to address a few issues:

- `cargo build` in a project with both a lib and bin would not engage the optimization introduced in #8192 where the lib *should* be compiled with `-C linker-plugin-lto` (bitcode only). This happened because the old code was starting root units as `Lto::None`. The solution here is to conditionally choose the starting Lto for roots.
- A project with a dylib dependency would fail to build. It was building the dylib with `-C linker-plugin-lto` which is not valid.
- A project with a bin/lib would build the lib differently based on whether or not it was selected. This changes it so that the lib is built the same. See `lto::between_builds`, where the second build the lib is now fresh.
- Tests/benchmarks of a `lib` target will now support LTO.
- Treats example libs a little more consistently as regular libs.

I scattered some comments throughout, hopefully it's not too difficult to follow.

Closes #8337
  • Loading branch information
bors committed Jun 11, 2020
2 parents 1ec223e + 62a61dd commit ee417cb
Show file tree
Hide file tree
Showing 5 changed files with 481 additions and 86 deletions.
11 changes: 11 additions & 0 deletions src/cargo/core/compiler/crate_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,17 @@ impl CrateType {
}
}

pub fn can_lto(&self) -> bool {
match self {
CrateType::Bin | CrateType::Staticlib | CrateType::Cdylib => true,
CrateType::Lib
| CrateType::Rlib
| CrateType::Dylib
| CrateType::ProcMacro
| CrateType::Other(..) => false,
}
}

pub fn is_linkable(&self) -> bool {
match self {
CrateType::Lib | CrateType::Rlib | CrateType::Dylib | CrateType::ProcMacro => true,
Expand Down
185 changes: 123 additions & 62 deletions src/cargo/core/compiler/lto.rs
Original file line number Diff line number Diff line change
@@ -1,131 +1,192 @@
use crate::core::compiler::{Context, Unit};
use crate::core::compiler::{CompileMode, Context, CrateType, Unit};
use crate::core::interning::InternedString;
use crate::core::profiles;
use crate::core::TargetKind;

use crate::util::errors::CargoResult;
use std::collections::hash_map::{Entry, HashMap};

/// Possible ways to run rustc and request various parts of LTO.
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
///
/// Variant | Flag | Object Code | Bitcode
/// -------------------|------------------------|-------------|--------
/// `Run` | `-C lto=foo` | n/a | n/a
/// `Off` | `-C lto=off` | n/a | n/a
/// `OnlyBitcode` | `-C linker-plugin-lto` | | ✓
/// `ObjectAndBitcode` | | ✓ | ✓
/// `OnlyObject` | `-C embed-bitcode=no` | ✓ |
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum Lto {
/// LTO is run for this rustc, and it's `-Clto=foo` where `foo` is optional.
/// LTO is run for this rustc, and it's `-Clto=foo`. If the given value is
/// None, that corresponds to `-Clto` with no argument, which means do
/// "fat" LTO.
Run(Option<InternedString>),

/// This rustc invocation only needs to produce bitcode, there's no need to
/// produce object files, so we can pass `-Clinker-plugin-lto`
/// LTO has been explicitly listed as "off". This means no thin-local-LTO,
/// no LTO anywhere, I really mean it!
Off,

/// This rustc invocation only needs to produce bitcode (it is *only* used
/// for LTO), there's no need to produce object files, so we can pass
/// `-Clinker-plugin-lto`
OnlyBitcode,

/// This rustc invocation needs to embed bitcode in object files. This means
/// that object files may be used for a normal link, and the crate may be
/// loaded for LTO later, so both are required.
EmbedBitcode,
ObjectAndBitcode,

/// Nothing related to LTO is required of this compilation.
None,
/// This should not include bitcode. This is primarily to reduce disk
/// space usage.
OnlyObject,
}

pub fn generate(cx: &mut Context<'_, '_>) -> CargoResult<()> {
let mut map = HashMap::new();
for unit in cx.bcx.roots.iter() {
calculate(cx, &mut map, unit, Lto::None)?;
let root_lto = match unit.profile.lto {
// LTO not requested, no need for bitcode.
profiles::Lto::Bool(false) | profiles::Lto::Off => Lto::OnlyObject,
_ => {
let crate_types = unit.target.rustc_crate_types();
if unit.target.for_host() {
Lto::OnlyObject
} else if needs_object(&crate_types) {
lto_when_needs_object(&crate_types)
} else {
// This may or may not participate in LTO, let's start
// with the minimum requirements. This may be expanded in
// `calculate` below if necessary.
Lto::OnlyBitcode
}
}
};
calculate(cx, &mut map, unit, root_lto)?;
}
cx.lto = map;
Ok(())
}

/// Whether or not any of these crate types need object code.
fn needs_object(crate_types: &[CrateType]) -> bool {
crate_types.iter().any(|k| k.can_lto() || k.is_dynamic())
}

/// Lto setting to use when this unit needs object code.
fn lto_when_needs_object(crate_types: &[CrateType]) -> Lto {
if crate_types.iter().any(CrateType::can_lto) {
// A mixed rlib/cdylib whose parent is running LTO. This
// needs both, for bitcode in the rlib (for LTO) and the
// cdylib requires object code.
Lto::ObjectAndBitcode
} else {
// A dylib whose parent is running LTO. rustc currently
// doesn't support LTO with dylibs, so bitcode is not
// needed.
Lto::OnlyObject
}
}

fn calculate(
cx: &Context<'_, '_>,
map: &mut HashMap<Unit, Lto>,
unit: &Unit,
lto_for_deps: Lto,
parent_lto: Lto,
) -> CargoResult<()> {
let (lto, lto_for_deps) = if unit.target.for_host() {
let crate_types = match unit.mode {
// Note: Doctest ignores LTO, but for now we'll compute it as-if it is
// a Bin, in case it is ever supported in the future.
CompileMode::Test | CompileMode::Bench | CompileMode::Doctest => vec![CrateType::Bin],
// Notes on other modes:
// - Check: Treat as the underlying type, it doesn't really matter.
// - Doc: LTO is N/A for the Doc unit itself since rustdoc does not
// support codegen flags. We still compute the dependencies, which
// are mostly `Check`.
// - RunCustomBuild is ignored because it is always "for_host".
_ => unit.target.rustc_crate_types(),
};
// LTO can only be performed if *all* of the crate types support it.
// For example, a cdylib/rlib combination won't allow LTO.
let all_lto_types = crate_types.iter().all(CrateType::can_lto);
// Compute the LTO based on the profile, and what our parent requires.
let lto = if unit.target.for_host() {
// Disable LTO for host builds since we only really want to perform LTO
// for the final binary, and LTO on plugins/build scripts/proc macros is
// largely not desired.
(Lto::None, Lto::None)
} else if unit.target.is_linkable() {
// A "linkable" target is one that produces and rlib or dylib in this
// case. In this scenario we cannot pass `-Clto` to the compiler because
// that is an invalid request, this is simply a dependency. What we do,
// however, is respect the request for whatever dependencies need to
// have.
//
// Here if no LTO is requested then we keep it turned off. Otherwise LTO
// is requested in some form, which means ideally we need just what's
// requested, nothing else. It's possible, though, to have libraries
// which are both a cdylib and and rlib, for example, which means that
// object files are getting sent to the linker. That means that we need
// to fully embed bitcode rather than simply generating just bitcode.
let has_non_linkable_lib = match unit.target.kind() {
TargetKind::Lib(kinds) => kinds.iter().any(|k| !k.is_linkable()),
_ => true,
};
match lto_for_deps {
Lto::None => (Lto::None, Lto::None),
_ if has_non_linkable_lib => (Lto::EmbedBitcode, Lto::EmbedBitcode),
other => (other, other),
Lto::OnlyObject
} else if all_lto_types {
// Note that this ignores the `parent_lto` because this isn't a
// linkable crate type; this unit is not being embedded in the parent.
match unit.profile.lto {
profiles::Lto::Named(s) => Lto::Run(Some(s)),
profiles::Lto::Off => Lto::Off,
profiles::Lto::Bool(true) => Lto::Run(None),
profiles::Lto::Bool(false) => Lto::OnlyObject,
}
} else {
// Otherwise this target can perform LTO and we're going to read the
// LTO value out of the profile. Note that we ignore `lto_for_deps`
// here because if a unit depends on another unit than can LTO this
// isn't a rustc-level dependency but rather a Cargo-level dependency.
// For example this is an integration test depending on a binary.
match unit.profile.lto {
profiles::Lto::Named(s) => match s.as_str() {
"n" | "no" | "off" => (Lto::Run(Some(s)), Lto::None),
_ => (Lto::Run(Some(s)), Lto::OnlyBitcode),
},
profiles::Lto::Bool(true) => (Lto::Run(None), Lto::OnlyBitcode),
profiles::Lto::Bool(false) => (Lto::None, Lto::None),
match (parent_lto, needs_object(&crate_types)) {
// An rlib whose parent is running LTO, we only need bitcode.
(Lto::Run(_), false) => Lto::OnlyBitcode,
// LTO when something needs object code.
(Lto::Run(_), true) | (Lto::OnlyBitcode, true) => lto_when_needs_object(&crate_types),
// LTO is disabled, no need for bitcode.
(Lto::Off, _) => Lto::OnlyObject,
// If this doesn't have any requirements, or the requirements are
// already satisfied, then stay with our parent.
(_, false) | (Lto::OnlyObject, true) | (Lto::ObjectAndBitcode, true) => parent_lto,
}
};

match map.entry(unit.clone()) {
// Merge the computed LTO. If this unit appears multiple times in the
// graph, the merge may expand the requirements.
let merged_lto = match map.entry(unit.clone()) {
// If we haven't seen this unit before then insert our value and keep
// going.
Entry::Vacant(v) => {
v.insert(lto);
}
Entry::Vacant(v) => *v.insert(lto),

Entry::Occupied(mut v) => {
let result = match (lto, v.get()) {
// No change in requirements.
(Lto::OnlyBitcode, Lto::OnlyBitcode) => Lto::OnlyBitcode,
(Lto::OnlyObject, Lto::OnlyObject) => Lto::OnlyObject,

// Once we're running LTO we keep running LTO. We should always
// calculate the same thing here each iteration because if we
// see this twice then it means, for example, two unit tests
// depend on a binary, which is normal.
(Lto::Run(s), _) | (_, &Lto::Run(s)) => Lto::Run(s),

// If we calculated the same thing as before then we can bail
// out quickly.
(Lto::OnlyBitcode, Lto::OnlyBitcode) | (Lto::None, Lto::None) => return Ok(()),
// Off means off! This has the same reasoning as `Lto::Run`.
(Lto::Off, _) | (_, Lto::Off) => Lto::Off,

// Once a target has requested both, that's the maximal amount
// of work that can be done, so we just keep doing that work.
(Lto::ObjectAndBitcode, _) | (_, Lto::ObjectAndBitcode) => Lto::ObjectAndBitcode,

// Upgrade so that both requirements can be met.
//
// This is where the trickiness happens. This unit needs
// bitcode and the previously calculated value for this unit
// says it didn't need bitcode (or vice versa). This means that
// we're a shared dependency between some targets which require
// LTO and some which don't. This means that instead of being
// either only-objects or only-bitcode we have to embed both in
// rlibs (used for different compilations), so we switch to
// embedding bitcode.
(Lto::OnlyBitcode, Lto::None) | (Lto::None, Lto::OnlyBitcode) => Lto::EmbedBitcode,

// Once a target has requested bitcode embedding that's the
// maximal amount of work that can be done, so we just keep
// doing that work.
(Lto::EmbedBitcode, _) | (_, Lto::EmbedBitcode) => Lto::EmbedBitcode,
// including both.
(Lto::OnlyObject, Lto::OnlyBitcode) | (Lto::OnlyBitcode, Lto::OnlyObject) => {
Lto::ObjectAndBitcode
}
};
// No need to recurse if we calculated the same value as before.
if result == *v.get() {
return Ok(());
}
v.insert(result);
result
}
}
};

for dep in cx.unit_deps(unit) {
calculate(cx, map, &dep.unit, lto_for_deps)?;
calculate(cx, map, &dep.unit, merged_lto)?;
}
Ok(())
}
8 changes: 6 additions & 2 deletions src/cargo/core/compiler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ pub use self::job::Freshness;
use self::job::{Job, Work};
use self::job_queue::{JobQueue, JobState};
pub(crate) use self::layout::Layout;
pub use self::lto::Lto;
use self::output_depinfo::output_depinfo;
use self::unit_graph::UnitDep;
pub use crate::core::compiler::unit::{Unit, UnitInterner};
Expand Down Expand Up @@ -787,7 +788,10 @@ fn build_base_args(
lto::Lto::Run(Some(s)) => {
cmd.arg("-C").arg(format!("lto={}", s));
}
lto::Lto::EmbedBitcode => {} // this is rustc's default
lto::Lto::Off => {
cmd.arg("-C").arg("lto=off");
}
lto::Lto::ObjectAndBitcode => {} // this is rustc's default
lto::Lto::OnlyBitcode => {
// Note that this compiler flag, like the one below, is just an
// optimization in terms of build time. If we don't pass it then
Expand All @@ -804,7 +808,7 @@ fn build_base_args(
cmd.arg("-Clinker-plugin-lto");
}
}
lto::Lto::None => {
lto::Lto::OnlyObject => {
if cx
.bcx
.target_data
Expand Down
8 changes: 7 additions & 1 deletion src/cargo/core/profiles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,9 @@ fn merge_profile(profile: &mut Profile, toml: &TomlProfile) {
}
match toml.lto {
Some(StringOrBool::Bool(b)) => profile.lto = Lto::Bool(b),
Some(StringOrBool::String(ref n)) if matches!(n.as_str(), "off" | "n" | "no") => {
profile.lto = Lto::Off
}
Some(StringOrBool::String(ref n)) => profile.lto = Lto::Named(InternedString::new(n)),
None => {}
}
Expand Down Expand Up @@ -747,8 +750,10 @@ impl Profile {
/// The link-time-optimization setting.
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash, PartialOrd, Ord)]
pub enum Lto {
/// False = no LTO
/// Explicitly no LTO, disables thin-LTO.
Off,
/// True = "Fat" LTO
/// False = rustc default (no args), currently "thin LTO"
Bool(bool),
/// Named LTO settings like "thin".
Named(InternedString),
Expand All @@ -760,6 +765,7 @@ impl serde::ser::Serialize for Lto {
S: serde::ser::Serializer,
{
match self {
Lto::Off => "off".serialize(s),
Lto::Bool(b) => b.to_string().serialize(s),
Lto::Named(n) => n.serialize(s),
}
Expand Down
Loading

0 comments on commit ee417cb

Please sign in to comment.