Skip to content

Commit cfe07cd

Browse files
committed
Use llvm::computeLTOCacheKey to determine post-ThinLTO CGU reuse
During incremental ThinLTO compilation, we attempt to re-use the optimized (post-ThinLTO) bitcode file for a module if it is 'safe' to do so. Up until now, 'safe' has meant that the set of modules that our current modules imports from/exports to is unchanged from the previous compilation session. See PR #67020 and PR #71131 for more details. However, this turns out be insufficient to guarantee that it's safe to reuse the post-LTO module (i.e. that optimizing the pre-LTO module would produce the same result). When LLVM optimizes a module during ThinLTO, it may look at other information from the 'module index', such as whether a (non-imported!) global variable is used. If this information changes between compilation runs, we may end up re-using an optimized module that (for example) had dead-code elimination run on a function that is now used by another module. Fortunately, LLVM implements its own ThinLTO module cache, which is used when ThinLTO is performed by a linker plugin (e.g. when clang is used to compile a C proect). Using this cache directly would require extensive refactoring of our code - but fortunately for us, LLVM provides a function that does exactly what we need. The function `llvm::computeLTOCacheKey` is used to compute a SHA-1 hash from all data that might influence the result of ThinLTO on a module. In addition to the module imports/exports that we manually track, it also hashes information about global variables (e.g. their liveness) which might be used during optimization. By using this function, we shouldn't have to worry about new LLVM passes breaking our module re-use behavior. In LLVM, the output of this function forms part of the filename used to store the post-ThinLTO module. To keep our current filename structure intact, this PR just writes out the mapping 'CGU name -> Hash' to a file. To determine if a post-LTO module should be reused, we compare hashes from the previous session. This should unblock PR #75199 - by sheer chance, it seems to have hit this issue due to the particular CGU partitioning and optimization decisions that end up getting made.
1 parent 7bdb5de commit cfe07cd

File tree

6 files changed

+164
-197
lines changed

6 files changed

+164
-197
lines changed

compiler/rustc_codegen_llvm/src/back/lto.rs

+69-192
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@ use crate::back::write::{
22
self, save_temp_bitcode, to_llvm_opt_settings, with_llvm_pmb, DiagnosticHandlers,
33
};
44
use crate::llvm::archive_ro::ArchiveRO;
5-
use crate::llvm::{self, False, True};
5+
use crate::llvm::{self, build_string, False, True};
66
use crate::{LlvmCodegenBackend, ModuleLlvm};
77
use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared};
88
use rustc_codegen_ssa::back::symbol_export;
99
use rustc_codegen_ssa::back::write::{CodegenContext, FatLTOInput, ModuleConfig};
1010
use rustc_codegen_ssa::traits::*;
1111
use rustc_codegen_ssa::{looks_like_rust_object_file, ModuleCodegen, ModuleKind};
12-
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
12+
use rustc_data_structures::fx::FxHashMap;
1313
use rustc_errors::{FatalError, Handler};
1414
use rustc_hir::def_id::LOCAL_CRATE;
1515
use rustc_middle::bug;
@@ -22,16 +22,14 @@ use tracing::{debug, info};
2222
use std::ffi::{CStr, CString};
2323
use std::fs::File;
2424
use std::io;
25-
use std::mem;
2625
use std::path::Path;
2726
use std::ptr;
2827
use std::slice;
2928
use std::sync::Arc;
3029

31-
/// We keep track of past LTO imports that were used to produce the current set
32-
/// of compiled object files that we might choose to reuse during this
33-
/// compilation session.
34-
pub const THIN_LTO_IMPORTS_INCR_COMP_FILE_NAME: &str = "thin-lto-past-imports.bin";
30+
/// We keep track of the computed LTO cache keys from the previous
31+
/// session to determine which CGUs we can reuse.
32+
pub const THIN_LTO_KEYS_INCR_COMP_FILE_NAME: &str = "thin-lto-past-keys.bin";
3533

3634
pub fn crate_type_allows_lto(crate_type: CrateType) -> bool {
3735
match crate_type {
@@ -485,31 +483,31 @@ fn thin_lto(
485483
)
486484
.ok_or_else(|| write::llvm_err(&diag_handler, "failed to prepare thin LTO context"))?;
487485

488-
info!("thin LTO data created");
486+
let data = ThinData(data);
489487

490-
let (import_map_path, prev_import_map, curr_import_map) =
491-
if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
492-
let path = incr_comp_session_dir.join(THIN_LTO_IMPORTS_INCR_COMP_FILE_NAME);
493-
// If previous imports have been deleted, or we get an IO error
494-
// reading the file storing them, then we'll just use `None` as the
495-
// prev_import_map, which will force the code to be recompiled.
496-
let prev = if path.exists() {
497-
ThinLTOImportMaps::load_from_file(&path).ok()
498-
} else {
499-
None
500-
};
501-
let curr = ThinLTOImportMaps::from_thin_lto_data(data);
502-
(Some(path), prev, curr)
503-
} else {
504-
// If we don't compile incrementally, we don't need to load the
505-
// import data from LLVM.
506-
assert!(green_modules.is_empty());
507-
let curr = ThinLTOImportMaps::default();
508-
(None, None, curr)
509-
};
510-
info!("thin LTO import map loaded");
488+
info!("thin LTO data created");
511489

512-
let data = ThinData(data);
490+
let (key_map_path, prev_key_map, curr_key_map) = if let Some(ref incr_comp_session_dir) =
491+
cgcx.incr_comp_session_dir
492+
{
493+
let path = incr_comp_session_dir.join(THIN_LTO_KEYS_INCR_COMP_FILE_NAME);
494+
// If the previous file was deleted, or we get an IO error
495+
// reading the file, then we'll just use `None` as the
496+
// prev_key_map, which will force the code to be recompiled.
497+
let prev =
498+
if path.exists() { ThinLTOKeysMap::load_from_file(&path).ok() } else { None };
499+
let curr = ThinLTOKeysMap::from_thin_lto_modules(&data, &thin_modules, &module_names);
500+
(Some(path), prev, curr)
501+
} else {
502+
// If we don't compile incrementally, we don't need to load the
503+
// import data from LLVM.
504+
assert!(green_modules.is_empty());
505+
let curr = ThinLTOKeysMap::default();
506+
(None, None, curr)
507+
};
508+
info!("thin LTO cache key map loaded");
509+
info!("prev_key_map: {:#?}", prev_key_map);
510+
info!("curr_key_map: {:#?}", curr_key_map);
513511

514512
// Throw our data in an `Arc` as we'll be sharing it across threads. We
515513
// also put all memory referenced by the C++ data (buffers, ids, etc)
@@ -528,60 +526,14 @@ fn thin_lto(
528526
info!("checking which modules can be-reused and which have to be re-optimized.");
529527
for (module_index, module_name) in shared.module_names.iter().enumerate() {
530528
let module_name = module_name_to_str(module_name);
531-
532-
// If (1.) the module hasn't changed, and (2.) none of the modules
533-
// it imports from have changed, *and* (3.) the import and export
534-
// sets themselves have not changed from the previous compile when
535-
// it was last ThinLTO'ed, then we can re-use the post-ThinLTO
536-
// version of the module. Otherwise, freshly perform LTO
537-
// optimization.
538-
//
539-
// (Note that globally, the export set is just the inverse of the
540-
// import set.)
541-
//
542-
// For further justification of why the above is necessary and sufficient,
543-
// see the LLVM blog post on ThinLTO:
544-
//
545-
// http://blog.llvm.org/2016/06/thinlto-scalable-and-incremental-lto.html
546-
//
547-
// which states the following:
548-
//
549-
// ```quote
550-
// any particular ThinLTO backend must be redone iff:
551-
//
552-
// 1. The corresponding (primary) module’s bitcode changed
553-
// 2. The list of imports into or exports from the module changed
554-
// 3. The bitcode for any module being imported from has changed
555-
// 4. Any global analysis result affecting either the primary module
556-
// or anything it imports has changed.
557-
// ```
558-
//
559-
// This strategy means we can always save the computed imports as
560-
// canon: when we reuse the post-ThinLTO version, condition (3.)
561-
// ensures that the current import set is the same as the previous
562-
// one. (And of course, when we don't reuse the post-ThinLTO
563-
// version, the current import set *is* the correct one, since we
564-
// are doing the ThinLTO in this current compilation cycle.)
565-
//
566-
// For more discussion, see rust-lang/rust#59535 (where the import
567-
// issue was discovered) and rust-lang/rust#69798 (where the
568-
// analogous export issue was discovered).
569-
if let (Some(prev_import_map), true) =
570-
(prev_import_map.as_ref(), green_modules.contains_key(module_name))
529+
if let (Some(prev_key_map), true) =
530+
(prev_key_map.as_ref(), green_modules.contains_key(module_name))
571531
{
572532
assert!(cgcx.incr_comp_session_dir.is_some());
573533

574-
let prev_imports = prev_import_map.imports_of(module_name);
575-
let curr_imports = curr_import_map.imports_of(module_name);
576-
let prev_exports = prev_import_map.exports_of(module_name);
577-
let curr_exports = curr_import_map.exports_of(module_name);
578-
let imports_all_green = curr_imports
579-
.iter()
580-
.all(|imported_module| green_modules.contains_key(imported_module));
581-
if imports_all_green
582-
&& equivalent_as_sets(prev_imports, curr_imports)
583-
&& equivalent_as_sets(prev_exports, curr_exports)
584-
{
534+
// If a module exists in both the current and the previous session,
535+
// and has the same LTO cache key in both sessions, then we can re-use it
536+
if prev_key_map.keys.get(module_name) == curr_key_map.keys.get(module_name) {
585537
let work_product = green_modules[module_name].clone();
586538
copy_jobs.push(work_product);
587539
info!(" - {}: re-used", module_name);
@@ -599,10 +551,10 @@ fn thin_lto(
599551
}
600552

601553
// Save the current ThinLTO import information for the next compilation
602-
// session, overwriting the previous serialized imports (if any).
603-
if let Some(path) = import_map_path {
604-
if let Err(err) = curr_import_map.save_to_file(&path) {
605-
let msg = format!("Error while writing ThinLTO import data: {}", err);
554+
// session, overwriting the previous serialized data (if any).
555+
if let Some(path) = key_map_path {
556+
if let Err(err) = curr_key_map.save_to_file(&path) {
557+
let msg = format!("Error while writing ThinLTO key data: {}", err);
606558
return Err(write::llvm_err(&diag_handler, &msg));
607559
}
608560
}
@@ -611,24 +563,6 @@ fn thin_lto(
611563
}
612564
}
613565

614-
/// Given two slices, each with no repeat elements. returns true if and only if
615-
/// the two slices have the same contents when considered as sets (i.e. when
616-
/// element order is disregarded).
617-
fn equivalent_as_sets(a: &[String], b: &[String]) -> bool {
618-
// cheap path: unequal lengths means cannot possibly be set equivalent.
619-
if a.len() != b.len() {
620-
return false;
621-
}
622-
// fast path: before building new things, check if inputs are equivalent as is.
623-
if a == b {
624-
return true;
625-
}
626-
// slow path: general set comparison.
627-
let a: FxHashSet<&str> = a.iter().map(|s| s.as_str()).collect();
628-
let b: FxHashSet<&str> = b.iter().map(|s| s.as_str()).collect();
629-
a == b
630-
}
631-
632566
pub(crate) fn run_pass_manager(
633567
cgcx: &CodegenContext<LlvmCodegenBackend>,
634568
module: &ModuleCodegen<ModuleLlvm>,
@@ -942,113 +876,56 @@ pub unsafe fn optimize_thin_module(
942876
Ok(module)
943877
}
944878

945-
/// Summarizes module import/export relationships used by LLVM's ThinLTO pass.
946-
///
947-
/// Note that we tend to have two such instances of `ThinLTOImportMaps` in use:
948-
/// one loaded from a file that represents the relationships used during the
949-
/// compilation associated with the incremetnal build artifacts we are
950-
/// attempting to reuse, and another constructed via `from_thin_lto_data`, which
951-
/// captures the relationships of ThinLTO in the current compilation.
879+
/// Maps LLVM module identifiers to their corresponding LLVM LTO cache keys
952880
#[derive(Debug, Default)]
953-
pub struct ThinLTOImportMaps {
954-
// key = llvm name of importing module, value = list of modules it imports from
955-
imports: FxHashMap<String, Vec<String>>,
956-
// key = llvm name of exporting module, value = list of modules it exports to
957-
exports: FxHashMap<String, Vec<String>>,
881+
pub struct ThinLTOKeysMap {
882+
// key = llvm name of importing module, value = LLVM cache key
883+
keys: FxHashMap<String, String>,
958884
}
959885

960-
impl ThinLTOImportMaps {
961-
/// Returns modules imported by `llvm_module_name` during some ThinLTO pass.
962-
fn imports_of(&self, llvm_module_name: &str) -> &[String] {
963-
self.imports.get(llvm_module_name).map(|v| &v[..]).unwrap_or(&[])
964-
}
965-
966-
/// Returns modules exported by `llvm_module_name` during some ThinLTO pass.
967-
fn exports_of(&self, llvm_module_name: &str) -> &[String] {
968-
self.exports.get(llvm_module_name).map(|v| &v[..]).unwrap_or(&[])
969-
}
970-
886+
impl ThinLTOKeysMap {
971887
fn save_to_file(&self, path: &Path) -> io::Result<()> {
972888
use std::io::Write;
973889
let file = File::create(path)?;
974890
let mut writer = io::BufWriter::new(file);
975-
for (importing_module_name, imported_modules) in &self.imports {
976-
writeln!(writer, "{}", importing_module_name)?;
977-
for imported_module in imported_modules {
978-
writeln!(writer, " {}", imported_module)?;
979-
}
980-
writeln!(writer)?;
891+
for (module, key) in &self.keys {
892+
writeln!(writer, "{} {}", module, key)?;
981893
}
982894
Ok(())
983895
}
984896

985-
fn load_from_file(path: &Path) -> io::Result<ThinLTOImportMaps> {
897+
fn load_from_file(path: &Path) -> io::Result<Self> {
986898
use std::io::BufRead;
987-
let mut imports = FxHashMap::default();
988-
let mut exports: FxHashMap<_, Vec<_>> = FxHashMap::default();
989-
let mut current_module: Option<String> = None;
990-
let mut current_imports: Vec<String> = vec![];
899+
let mut keys = FxHashMap::default();
991900
let file = File::open(path)?;
992901
for line in io::BufReader::new(file).lines() {
993902
let line = line?;
994-
if line.is_empty() {
995-
let importing_module = current_module.take().expect("Importing module not set");
996-
for imported in &current_imports {
997-
exports.entry(imported.clone()).or_default().push(importing_module.clone());
998-
}
999-
imports.insert(importing_module, mem::replace(&mut current_imports, vec![]));
1000-
} else if line.starts_with(' ') {
1001-
// Space marks an imported module
1002-
assert_ne!(current_module, None);
1003-
current_imports.push(line.trim().to_string());
1004-
} else {
1005-
// Otherwise, beginning of a new module (must be start or follow empty line)
1006-
assert_eq!(current_module, None);
1007-
current_module = Some(line.trim().to_string());
1008-
}
903+
let mut split = line.split(" ");
904+
let module = split.next().unwrap();
905+
let key = split.next().unwrap();
906+
assert_eq!(split.next(), None, "Expected two space-separated values, found {:?}", line);
907+
keys.insert(module.to_string(), key.to_string());
1009908
}
1010-
Ok(ThinLTOImportMaps { imports, exports })
909+
Ok(Self { keys })
1011910
}
1012911

1013-
/// Loads the ThinLTO import map from ThinLTOData.
1014-
unsafe fn from_thin_lto_data(data: *const llvm::ThinLTOData) -> ThinLTOImportMaps {
1015-
unsafe extern "C" fn imported_module_callback(
1016-
payload: *mut libc::c_void,
1017-
importing_module_name: *const libc::c_char,
1018-
imported_module_name: *const libc::c_char,
1019-
) {
1020-
let map = &mut *(payload as *mut ThinLTOImportMaps);
1021-
let importing_module_name = CStr::from_ptr(importing_module_name);
1022-
let importing_module_name = module_name_to_str(&importing_module_name);
1023-
let imported_module_name = CStr::from_ptr(imported_module_name);
1024-
let imported_module_name = module_name_to_str(&imported_module_name);
1025-
1026-
if !map.imports.contains_key(importing_module_name) {
1027-
map.imports.insert(importing_module_name.to_owned(), vec![]);
1028-
}
1029-
1030-
map.imports
1031-
.get_mut(importing_module_name)
1032-
.unwrap()
1033-
.push(imported_module_name.to_owned());
1034-
1035-
if !map.exports.contains_key(imported_module_name) {
1036-
map.exports.insert(imported_module_name.to_owned(), vec![]);
1037-
}
1038-
1039-
map.exports
1040-
.get_mut(imported_module_name)
1041-
.unwrap()
1042-
.push(importing_module_name.to_owned());
1043-
}
1044-
1045-
let mut map = ThinLTOImportMaps::default();
1046-
llvm::LLVMRustGetThinLTOModuleImports(
1047-
data,
1048-
imported_module_callback,
1049-
&mut map as *mut _ as *mut libc::c_void,
1050-
);
1051-
map
912+
fn from_thin_lto_modules(
913+
data: &ThinData,
914+
modules: &[llvm::ThinLTOModule],
915+
names: &[CString],
916+
) -> Self {
917+
let keys = modules
918+
.iter()
919+
.zip(names.iter())
920+
.map(|(module, name)| {
921+
let key = build_string(|rust_str| unsafe {
922+
llvm::LLVMRustComputeLTOCacheKey(rust_str, module.identifier, data.0);
923+
})
924+
.expect("Invalid ThinLTO module key");
925+
(name.clone().into_string().unwrap(), key)
926+
})
927+
.collect();
928+
Self { keys }
1052929
}
1053930
}
1054931

compiler/rustc_codegen_llvm/src/llvm/ffi.rs

+6
Original file line numberDiff line numberDiff line change
@@ -2362,4 +2362,10 @@ extern "C" {
23622362
bytecode_len: usize,
23632363
) -> bool;
23642364
pub fn LLVMRustLinkerFree(linker: &'a mut Linker<'a>);
2365+
#[allow(improper_ctypes)]
2366+
pub fn LLVMRustComputeLTOCacheKey(
2367+
key_out: &RustString,
2368+
mod_id: *const c_char,
2369+
data: &ThinLTOData,
2370+
);
23652371
}

0 commit comments

Comments
 (0)