Skip to content

Commit

Permalink
Auto merge of #43506 - michaelwoerister:async-llvm, r=alexcrichton
Browse files Browse the repository at this point in the history
Run translation and LLVM in parallel when compiling with multiple CGUs

This is still a work in progress but the bulk of the implementation is done, so I thought it would be good to get it in front of more eyes.

This PR makes the compiler start running LLVM while translation is still in progress, effectively allowing for more parallelism towards the end of the compilation pipeline. It also allows the main thread to switch between either translation or running LLVM, which allows to reduce peak memory usage since not all LLVM module have to be kept in memory until linking. This is especially good for incr. comp. but it works just as well when running with `-Ccodegen-units=N`.

In order to help tuning and debugging the work scheduler, the PR adds the `-Ztrans-time-graph` flag which spits out html files that show how work packages where scheduled:
![Building regex](https://user-images.githubusercontent.com/1825894/28679272-f6752bd8-72f2-11e7-8a6c-56207855ce95.png)
(red is translation, green is llvm)

One side effect here is that `-Ztime-passes` might show something not quite correct because trans and LLVM are not strictly separated anymore. I plan to have some special handling there that will try to produce useful output.

One open question is how to determine whether the trans-thread should switch to intermediate LLVM processing.

TODO:
- [x] Restore `-Z time-passes` output for LLVM.
- [x] Update documentation, esp. for work package scheduling.
- [x] Tune the scheduling algorithm.

cc @alexcrichton @rust-lang/compiler
  • Loading branch information
bors committed Aug 1, 2017
2 parents c240751 + 6468cad commit e772c28
Show file tree
Hide file tree
Showing 16 changed files with 1,558 additions and 653 deletions.
2 changes: 1 addition & 1 deletion src/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 4 additions & 6 deletions src/librustc/middle/cstore.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ pub use self::NativeLibraryKind::*;

// lonely orphan structs and enums looking for a better home

#[derive(Clone, Debug)]
#[derive(Clone, Debug, Copy)]
pub struct LinkMeta {
pub crate_hash: Svh,
}
Expand Down Expand Up @@ -161,15 +161,13 @@ pub struct ExternCrate {
}

pub struct EncodedMetadata {
pub raw_data: Vec<u8>,
pub hashes: EncodedMetadataHashes,
pub raw_data: Vec<u8>
}

impl EncodedMetadata {
pub fn new() -> EncodedMetadata {
EncodedMetadata {
raw_data: Vec::new(),
hashes: EncodedMetadataHashes::new(),
}
}
}
Expand Down Expand Up @@ -294,7 +292,7 @@ pub trait CrateStore {
tcx: TyCtxt<'a, 'tcx, 'tcx>,
link_meta: &LinkMeta,
reachable: &NodeSet)
-> EncodedMetadata;
-> (EncodedMetadata, EncodedMetadataHashes);
fn metadata_encoding_version(&self) -> &[u8];
}

Expand Down Expand Up @@ -424,7 +422,7 @@ impl CrateStore for DummyCrateStore {
tcx: TyCtxt<'a, 'tcx, 'tcx>,
link_meta: &LinkMeta,
reachable: &NodeSet)
-> EncodedMetadata {
-> (EncodedMetadata, EncodedMetadataHashes) {
bug!("encode_metadata")
}
fn metadata_encoding_version(&self) -> &[u8] { bug!("metadata_encoding_version") }
Expand Down
19 changes: 19 additions & 0 deletions src/librustc/session/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1059,6 +1059,8 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
"choose which RELRO level to use"),
nll: bool = (false, parse_bool, [UNTRACKED],
"run the non-lexical lifetimes MIR pass"),
trans_time_graph: bool = (false, parse_bool, [UNTRACKED],
"generate a graphical HTML report of time spent in trans and LLVM"),
}

pub fn default_lib_output() -> CrateType {
Expand Down Expand Up @@ -1498,6 +1500,23 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
early_error(error_format, "Value for codegen units must be a positive nonzero integer");
}

// It's possible that we have `codegen_units > 1` but only one item in
// `trans.modules`. We could theoretically proceed and do LTO in that
// case, but it would be confusing to have the validity of
// `-Z lto -C codegen-units=2` depend on details of the crate being
// compiled, so we complain regardless.
if cg.lto && cg.codegen_units > 1 {
// This case is impossible to handle because LTO expects to be able
// to combine the entire crate and all its dependencies into a
// single compilation unit, but each codegen unit is in a separate
// LLVM context, so they can't easily be combined.
early_error(error_format, "can't perform LTO when using multiple codegen units");
}

if cg.lto && debugging_opts.incremental.is_some() {
early_error(error_format, "can't perform LTO when compiling incrementally");
}

let mut prints = Vec::<PrintRequest>::new();
if cg.target_cpu.as_ref().map_or(false, |s| s == "help") {
prints.push(PrintRequest::TargetCPUs);
Expand Down
32 changes: 27 additions & 5 deletions src/librustc/util/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,32 @@ pub fn time<T, F>(do_it: bool, what: &str, f: F) -> T where
let rv = f();
let dur = start.elapsed();

print_time_passes_entry_internal(what, dur);

TIME_DEPTH.with(|slot| slot.set(old));

rv
}

pub fn print_time_passes_entry(do_it: bool, what: &str, dur: Duration) {
if !do_it {
return
}

let old = TIME_DEPTH.with(|slot| {
let r = slot.get();
slot.set(r + 1);
r
});

print_time_passes_entry_internal(what, dur);

TIME_DEPTH.with(|slot| slot.set(old));
}

fn print_time_passes_entry_internal(what: &str, dur: Duration) {
let indentation = TIME_DEPTH.with(|slot| slot.get());

let mem_string = match get_resident() {
Some(n) => {
let mb = n as f64 / 1_000_000.0;
Expand All @@ -65,14 +91,10 @@ pub fn time<T, F>(do_it: bool, what: &str, f: F) -> T where
None => "".to_owned(),
};
println!("{}time: {}{}\t{}",
repeat(" ").take(old).collect::<String>(),
repeat(" ").take(indentation).collect::<String>(),
duration_to_secs_str(dur),
mem_string,
what);

TIME_DEPTH.with(|slot| slot.set(old));

rv
}

// Hack up our own formatting for the duration to make it easier for scripts
Expand Down
64 changes: 12 additions & 52 deletions src/librustc_driver/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ use rustc_data_structures::stable_hasher::StableHasher;
use rustc_mir as mir;
use rustc::session::{Session, CompileResult};
use rustc::session::CompileIncomplete;
use rustc::session::config::{self, Input, OutputFilenames, OutputType,
OutputTypes};
use rustc::session::config::{self, Input, OutputFilenames, OutputType};
use rustc::session::search_paths::PathKind;
use rustc::lint;
use rustc::middle::{self, dependency_format, stability, reachable};
Expand All @@ -26,7 +25,6 @@ use rustc::ty::{self, TyCtxt, Resolutions, GlobalArenas};
use rustc::traits;
use rustc::util::common::{ErrorReported, time};
use rustc::util::nodemap::NodeSet;
use rustc::util::fs::rename_or_copy_remove;
use rustc_allocator as allocator;
use rustc_borrowck as borrowck;
use rustc_incremental::{self, IncrementalHashesMap};
Expand Down Expand Up @@ -208,7 +206,7 @@ pub fn compile_input(sess: &Session,
println!("Pre-trans");
tcx.print_debug_stats();
}
let trans = phase_4_translate_to_llvm(tcx, analysis, &incremental_hashes_map,
let trans = phase_4_translate_to_llvm(tcx, analysis, incremental_hashes_map,
&outputs);

if log_enabled!(::log::LogLevel::Info) {
Expand All @@ -231,16 +229,14 @@ pub fn compile_input(sess: &Session,
sess.code_stats.borrow().print_type_sizes();
}

let phase5_result = phase_5_run_llvm_passes(sess, &trans, &outputs);
let (phase5_result, trans) = phase_5_run_llvm_passes(sess, trans);

controller_entry_point!(after_llvm,
sess,
CompileState::state_after_llvm(input, sess, outdir, output, &trans),
phase5_result);
phase5_result?;

write::cleanup_llvm(&trans);

phase_6_link_output(sess, &trans, &outputs);

// Now that we won't touch anything in the incremental compilation directory
Expand Down Expand Up @@ -1055,9 +1051,9 @@ pub fn phase_3_run_analysis_passes<'tcx, F, R>(sess: &'tcx Session,
/// be discarded.
pub fn phase_4_translate_to_llvm<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
analysis: ty::CrateAnalysis,
incremental_hashes_map: &IncrementalHashesMap,
incremental_hashes_map: IncrementalHashesMap,
output_filenames: &OutputFilenames)
-> trans::CrateTranslation {
-> write::OngoingCrateTranslation {
let time_passes = tcx.sess.time_passes();

time(time_passes,
Expand All @@ -1067,63 +1063,27 @@ pub fn phase_4_translate_to_llvm<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
let translation =
time(time_passes,
"translation",
move || trans::trans_crate(tcx, analysis, &incremental_hashes_map, output_filenames));

time(time_passes,
"assert dep graph",
|| rustc_incremental::assert_dep_graph(tcx));
move || trans::trans_crate(tcx, analysis, incremental_hashes_map, output_filenames));

time(time_passes,
"serialize dep graph",
|| rustc_incremental::save_dep_graph(tcx,
&incremental_hashes_map,
&translation.metadata.hashes,
translation.link.crate_hash));
translation
}

/// Run LLVM itself, producing a bitcode file, assembly file or object file
/// as a side effect.
pub fn phase_5_run_llvm_passes(sess: &Session,
trans: &trans::CrateTranslation,
outputs: &OutputFilenames) -> CompileResult {
if sess.opts.cg.no_integrated_as ||
(sess.target.target.options.no_integrated_as &&
(outputs.outputs.contains_key(&OutputType::Object) ||
outputs.outputs.contains_key(&OutputType::Exe)))
{
let output_types = OutputTypes::new(&[(OutputType::Assembly, None)]);
time(sess.time_passes(),
"LLVM passes",
|| write::run_passes(sess, trans, &output_types, outputs));

write::run_assembler(sess, outputs);

// HACK the linker expects the object file to be named foo.0.o but
// `run_assembler` produces an object named just foo.o. Rename it if we
// are going to build an executable
if sess.opts.output_types.contains_key(&OutputType::Exe) {
let f = outputs.path(OutputType::Object);
rename_or_copy_remove(&f,
f.with_file_name(format!("{}.0.o",
f.file_stem().unwrap().to_string_lossy()))).unwrap();
}
trans: write::OngoingCrateTranslation)
-> (CompileResult, trans::CrateTranslation) {
let trans = trans.join(sess);

// Remove assembly source, unless --save-temps was specified
if !sess.opts.cg.save_temps {
fs::remove_file(&outputs.temp_path(OutputType::Assembly, None)).unwrap();
}
} else {
time(sess.time_passes(),
"LLVM passes",
|| write::run_passes(sess, trans, &sess.opts.output_types, outputs));
if sess.opts.debugging_opts.incremental_info {
write::dump_incremental_data(&trans);
}

time(sess.time_passes(),
"serialize work products",
move || rustc_incremental::save_work_products(sess));

sess.compile_status()
(sess.compile_status(), trans)
}

/// Run the linker on any artifacts that resulted from the LLVM run.
Expand Down
4 changes: 2 additions & 2 deletions src/librustc_incremental/persist/save.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use super::file_format;
use super::work_product;

pub fn save_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
incremental_hashes_map: &IncrementalHashesMap,
incremental_hashes_map: IncrementalHashesMap,
metadata_hashes: &EncodedMetadataHashes,
svh: Svh) {
debug!("save_dep_graph()");
Expand All @@ -51,7 +51,7 @@ pub fn save_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
eprintln!("incremental: {} edges in dep-graph", query.graph.len_edges());
}

let mut hcx = HashContext::new(tcx, incremental_hashes_map);
let mut hcx = HashContext::new(tcx, &incremental_hashes_map);
let preds = Predecessors::new(&query, &mut hcx);
let mut current_metadata_hashes = FxHashMap();

Expand Down
5 changes: 3 additions & 2 deletions src/librustc_metadata/cstore_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ use schema;
use rustc::ty::maps::QueryConfig;
use rustc::middle::cstore::{CrateStore, CrateSource, LibSource, DepKind,
NativeLibrary, MetadataLoader, LinkMeta,
LinkagePreference, LoadedMacro, EncodedMetadata};
LinkagePreference, LoadedMacro, EncodedMetadata,
EncodedMetadataHashes};
use rustc::hir::def;
use rustc::middle::lang_items;
use rustc::session::Session;
Expand Down Expand Up @@ -443,7 +444,7 @@ impl CrateStore for cstore::CStore {
tcx: TyCtxt<'a, 'tcx, 'tcx>,
link_meta: &LinkMeta,
reachable: &NodeSet)
-> EncodedMetadata
-> (EncodedMetadata, EncodedMetadataHashes)
{
encoder::encode_metadata(tcx, link_meta, reachable)
}
Expand Down
7 changes: 2 additions & 5 deletions src/librustc_metadata/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1638,7 +1638,7 @@ impl<'a, 'tcx, 'v> ItemLikeVisitor<'v> for ImplVisitor<'a, 'tcx> {
pub fn encode_metadata<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
link_meta: &LinkMeta,
exported_symbols: &NodeSet)
-> EncodedMetadata
-> (EncodedMetadata, EncodedMetadataHashes)
{
let mut cursor = Cursor::new(vec![]);
cursor.write_all(METADATA_HEADER).unwrap();
Expand Down Expand Up @@ -1681,10 +1681,7 @@ pub fn encode_metadata<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
result[header + 2] = (pos >> 8) as u8;
result[header + 3] = (pos >> 0) as u8;

EncodedMetadata {
raw_data: result,
hashes: metadata_hashes,
}
(EncodedMetadata { raw_data: result }, metadata_hashes)
}

pub fn get_repr_options<'a, 'tcx, 'gcx>(tcx: &TyCtxt<'a, 'tcx, 'gcx>, did: DefId) -> ReprOptions {
Expand Down
2 changes: 1 addition & 1 deletion src/librustc_trans/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ crate-type = ["dylib"]
test = false

[dependencies]
crossbeam = "0.2"
num_cpus = "1.0"
flate2 = "0.2"
jobserver = "0.1.5"
log = "0.3"
Expand Down
Loading

0 comments on commit e772c28

Please sign in to comment.