Skip to content

Commit e983b4f

Browse files
committed
rustc: Implement incremental "fat" LTO
Currently the compiler will produce an error if both incremental compilation and full fat LTO is requested. With recent changes and the advent of incremental ThinLTO, however, all the hard work is already done for us and it's actually not too bad to remove this error! This commit updates the codegen backend to allow incremental full fat LTO. The semantics are that the input modules to LTO are all produce incrementally, but the final LTO step is always done unconditionally regardless of whether the inputs changed or not. The only real incremental win we could have here is if zero of the input modules changed, but that's so rare it's unlikely to be worthwhile to implement such a code path. cc #57968 cc rust-lang/cargo#6643
1 parent a54b5c7 commit e983b4f

File tree

8 files changed

+246
-83
lines changed

8 files changed

+246
-83
lines changed

src/librustc/session/mod.rs

+1-15
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use crate::lint;
99
use crate::lint::builtin::BuiltinLintDiagnostics;
1010
use crate::middle::allocator::AllocatorKind;
1111
use crate::middle::dependency_format;
12-
use crate::session::config::{OutputType, Lto};
12+
use crate::session::config::OutputType;
1313
use crate::session::search_paths::{PathKind, SearchPath};
1414
use crate::util::nodemap::{FxHashMap, FxHashSet};
1515
use crate::util::common::{duration_to_secs_str, ErrorReported};
@@ -1246,20 +1246,6 @@ pub fn build_session_(
12461246
// If it is useful to have a Session available already for validating a
12471247
// commandline argument, you can do so here.
12481248
fn validate_commandline_args_with_session_available(sess: &Session) {
1249-
1250-
if sess.opts.incremental.is_some() {
1251-
match sess.lto() {
1252-
Lto::Thin |
1253-
Lto::Fat => {
1254-
sess.err("can't perform LTO when compiling incrementally");
1255-
}
1256-
Lto::ThinLocal |
1257-
Lto::No => {
1258-
// This is fine
1259-
}
1260-
}
1261-
}
1262-
12631249
// Since we don't know if code in an rlib will be linked to statically or
12641250
// dynamically downstream, rustc generates `__imp_` symbols that help the
12651251
// MSVC linker deal with this lack of knowledge (#27438). Unfortunately,

src/librustc_codegen_llvm/back/lto.rs

+101-38
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use back::bytecode::{DecodedBytecode, RLIB_BYTECODE_EXTENSION};
22
use rustc_codegen_ssa::back::symbol_export;
3-
use rustc_codegen_ssa::back::write::{ModuleConfig, CodegenContext, pre_lto_bitcode_filename};
3+
use rustc_codegen_ssa::back::write::{ModuleConfig, CodegenContext, FatLTOInput};
44
use rustc_codegen_ssa::back::lto::{SerializedModule, LtoModuleCodegen, ThinShared, ThinModule};
55
use rustc_codegen_ssa::traits::*;
66
use back::write::{self, DiagnosticHandlers, with_llvm_pmb, save_temp_bitcode, to_llvm_opt_settings};
@@ -21,7 +21,6 @@ use rustc_codegen_ssa::{ModuleCodegen, ModuleKind};
2121
use libc;
2222

2323
use std::ffi::{CStr, CString};
24-
use std::fs;
2524
use std::ptr;
2625
use std::slice;
2726
use std::sync::Arc;
@@ -133,7 +132,8 @@ fn prepare_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
133132
/// Performs fat LTO by merging all modules into a single one and returning it
134133
/// for further optimization.
135134
pub(crate) fn run_fat(cgcx: &CodegenContext<LlvmCodegenBackend>,
136-
modules: Vec<ModuleCodegen<ModuleLlvm>>,
135+
modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
136+
cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
137137
timeline: &mut Timeline)
138138
-> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError>
139139
{
@@ -142,7 +142,15 @@ pub(crate) fn run_fat(cgcx: &CodegenContext<LlvmCodegenBackend>,
142142
let symbol_white_list = symbol_white_list.iter()
143143
.map(|c| c.as_ptr())
144144
.collect::<Vec<_>>();
145-
fat_lto(cgcx, &diag_handler, modules, upstream_modules, &symbol_white_list, timeline)
145+
fat_lto(
146+
cgcx,
147+
&diag_handler,
148+
modules,
149+
cached_modules,
150+
upstream_modules,
151+
&symbol_white_list,
152+
timeline,
153+
)
146154
}
147155

148156
/// Performs thin LTO by performing necessary global analysis and returning two
@@ -173,33 +181,17 @@ pub(crate) fn run_thin(cgcx: &CodegenContext<LlvmCodegenBackend>,
173181
}
174182

175183
pub(crate) fn prepare_thin(
176-
cgcx: &CodegenContext<LlvmCodegenBackend>,
177184
module: ModuleCodegen<ModuleLlvm>
178185
) -> (String, ThinBuffer) {
179186
let name = module.name.clone();
180187
let buffer = ThinBuffer::new(module.module_llvm.llmod());
181-
182-
// We emit the module after having serialized it into a ThinBuffer
183-
// because only then it will contain the ThinLTO module summary.
184-
if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
185-
if cgcx.config(module.kind).emit_pre_thin_lto_bc {
186-
let path = incr_comp_session_dir
187-
.join(pre_lto_bitcode_filename(&name));
188-
189-
fs::write(&path, buffer.data()).unwrap_or_else(|e| {
190-
panic!("Error writing pre-lto-bitcode file `{}`: {}",
191-
path.display(),
192-
e);
193-
});
194-
}
195-
}
196-
197188
(name, buffer)
198189
}
199190

200191
fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
201192
diag_handler: &Handler,
202-
mut modules: Vec<ModuleCodegen<ModuleLlvm>>,
193+
mut modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
194+
cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
203195
mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
204196
symbol_white_list: &[*const libc::c_char],
205197
timeline: &mut Timeline)
@@ -216,18 +208,53 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
216208
// file copy operations in the backend work correctly. The only other kind
217209
// of module here should be an allocator one, and if your crate is smaller
218210
// than the allocator module then the size doesn't really matter anyway.
219-
let (_, costliest_module) = modules.iter()
211+
let costliest_module = modules.iter()
220212
.enumerate()
213+
.filter_map(|(i, module)| {
214+
match module {
215+
FatLTOInput::InMemory(m) => Some((i, m)),
216+
FatLTOInput::Serialized { .. } => None,
217+
}
218+
})
221219
.filter(|&(_, module)| module.kind == ModuleKind::Regular)
222220
.map(|(i, module)| {
223221
let cost = unsafe {
224222
llvm::LLVMRustModuleCost(module.module_llvm.llmod())
225223
};
226224
(cost, i)
227225
})
228-
.max()
229-
.expect("must be codegen'ing at least one module");
230-
let module = modules.remove(costliest_module);
226+
.max();
227+
228+
// If we found a costliest module, we're good to go. Otherwise all our
229+
// inputs were serialized which could happen in the case, for example, that
230+
// all our inputs were incrementally reread from the cache and we're just
231+
// re-executing the LTO passes. If that's the case deserialize the first
232+
// module and create a linker with it.
233+
let module: ModuleCodegen<ModuleLlvm> = match costliest_module {
234+
Some((_cost, i)) => {
235+
match modules.remove(i) {
236+
FatLTOInput::InMemory(m) => m,
237+
FatLTOInput::Serialized { .. } => unreachable!(),
238+
}
239+
}
240+
None => {
241+
let pos = modules.iter().position(|m| {
242+
match m {
243+
FatLTOInput::InMemory(_) => false,
244+
FatLTOInput::Serialized { .. } => true,
245+
}
246+
}).expect("must have at least one serialized module");
247+
let (name, buffer) = match modules.remove(pos) {
248+
FatLTOInput::Serialized { name, buffer } => (name, buffer),
249+
FatLTOInput::InMemory(_) => unreachable!(),
250+
};
251+
ModuleCodegen {
252+
module_llvm: ModuleLlvm::parse(cgcx, &name, &buffer, diag_handler)?,
253+
name,
254+
kind: ModuleKind::Regular,
255+
}
256+
}
257+
};
231258
let mut serialized_bitcode = Vec::new();
232259
{
233260
let (llcx, llmod) = {
@@ -247,10 +274,20 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
247274
// way we know of to do that is to serialize them to a string and them parse
248275
// them later. Not great but hey, that's why it's "fat" LTO, right?
249276
serialized_modules.extend(modules.into_iter().map(|module| {
250-
let buffer = ModuleBuffer::new(module.module_llvm.llmod());
251-
let llmod_id = CString::new(&module.name[..]).unwrap();
252-
253-
(SerializedModule::Local(buffer), llmod_id)
277+
match module {
278+
FatLTOInput::InMemory(module) => {
279+
let buffer = ModuleBuffer::new(module.module_llvm.llmod());
280+
let llmod_id = CString::new(&module.name[..]).unwrap();
281+
(SerializedModule::Local(buffer), llmod_id)
282+
}
283+
FatLTOInput::Serialized { name, buffer } => {
284+
let llmod_id = CString::new(name).unwrap();
285+
(SerializedModule::Local(buffer), llmod_id)
286+
}
287+
}
288+
}));
289+
serialized_modules.extend(cached_modules.into_iter().map(|(buffer, wp)| {
290+
(buffer, CString::new(wp.cgu_name.clone()).unwrap())
254291
}));
255292

256293
// For all serialized bitcode files we parse them and link them in as we did
@@ -579,6 +616,16 @@ impl ModuleBuffer {
579616
llvm::LLVMRustModuleBufferCreate(m)
580617
})
581618
}
619+
620+
pub fn parse<'a>(
621+
&self,
622+
name: &str,
623+
cx: &'a llvm::Context,
624+
handler: &Handler,
625+
) -> Result<&'a llvm::Module, FatalError> {
626+
let name = CString::new(name).unwrap();
627+
parse_module(cx, &name, self.data(), handler)
628+
}
582629
}
583630

584631
impl ModuleBufferMethods for ModuleBuffer {
@@ -658,15 +705,12 @@ pub unsafe fn optimize_thin_module(
658705
// crates but for locally codegened modules we may be able to reuse
659706
// that LLVM Context and Module.
660707
let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
661-
let llmod_raw = llvm::LLVMRustParseBitcodeForThinLTO(
708+
let llmod_raw = parse_module(
662709
llcx,
663-
thin_module.data().as_ptr(),
664-
thin_module.data().len(),
665-
thin_module.shared.module_names[thin_module.idx].as_ptr(),
666-
).ok_or_else(|| {
667-
let msg = "failed to parse bitcode for thin LTO module";
668-
write::llvm_err(&diag_handler, msg)
669-
})? as *const _;
710+
&thin_module.shared.module_names[thin_module.idx],
711+
thin_module.data(),
712+
&diag_handler,
713+
)? as *const _;
670714
let module = ModuleCodegen {
671715
module_llvm: ModuleLlvm {
672716
llmod_raw,
@@ -823,3 +867,22 @@ fn module_name_to_str(c_str: &CStr) -> &str {
823867
c_str.to_str().unwrap_or_else(|e|
824868
bug!("Encountered non-utf8 LLVM module name `{}`: {}", c_str.to_string_lossy(), e))
825869
}
870+
871+
fn parse_module<'a>(
872+
cx: &'a llvm::Context,
873+
name: &CStr,
874+
data: &[u8],
875+
diag_handler: &Handler,
876+
) -> Result<&'a llvm::Module, FatalError> {
877+
unsafe {
878+
llvm::LLVMRustParseBitcodeForLTO(
879+
cx,
880+
data.as_ptr(),
881+
data.len(),
882+
name.as_ptr(),
883+
).ok_or_else(|| {
884+
let msg = "failed to parse bitcode for LTO module";
885+
write::llvm_err(&diag_handler, msg)
886+
})
887+
}
888+
}

src/librustc_codegen_llvm/lib.rs

+35-5
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ extern crate tempfile;
5454
extern crate memmap;
5555

5656
use rustc_codegen_ssa::traits::*;
57-
use rustc_codegen_ssa::back::write::{CodegenContext, ModuleConfig};
57+
use rustc_codegen_ssa::back::write::{CodegenContext, ModuleConfig, FatLTOInput};
5858
use rustc_codegen_ssa::back::lto::{SerializedModule, LtoModuleCodegen, ThinModule};
5959
use rustc_codegen_ssa::CompiledModule;
6060
use errors::{FatalError, Handler};
@@ -165,10 +165,11 @@ impl WriteBackendMethods for LlvmCodegenBackend {
165165
}
166166
fn run_fat_lto(
167167
cgcx: &CodegenContext<Self>,
168-
modules: Vec<ModuleCodegen<Self::Module>>,
168+
modules: Vec<FatLTOInput<Self>>,
169+
cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
169170
timeline: &mut Timeline
170171
) -> Result<LtoModuleCodegen<Self>, FatalError> {
171-
back::lto::run_fat(cgcx, modules, timeline)
172+
back::lto::run_fat(cgcx, modules, cached_modules, timeline)
172173
}
173174
fn run_thin_lto(
174175
cgcx: &CodegenContext<Self>,
@@ -204,10 +205,14 @@ impl WriteBackendMethods for LlvmCodegenBackend {
204205
back::write::codegen(cgcx, diag_handler, module, config, timeline)
205206
}
206207
fn prepare_thin(
207-
cgcx: &CodegenContext<Self>,
208208
module: ModuleCodegen<Self::Module>
209209
) -> (String, Self::ThinBuffer) {
210-
back::lto::prepare_thin(cgcx, module)
210+
back::lto::prepare_thin(module)
211+
}
212+
fn serialize_module(
213+
module: ModuleCodegen<Self::Module>
214+
) -> (String, Self::ModuleBuffer) {
215+
(module.name, back::lto::ModuleBuffer::new(module.module_llvm.llmod()))
211216
}
212217
fn run_lto_pass_manager(
213218
cgcx: &CodegenContext<Self>,
@@ -375,6 +380,31 @@ impl ModuleLlvm {
375380
}
376381
}
377382

383+
fn parse(
384+
cgcx: &CodegenContext<LlvmCodegenBackend>,
385+
name: &str,
386+
buffer: &back::lto::ModuleBuffer,
387+
handler: &Handler,
388+
) -> Result<Self, FatalError> {
389+
unsafe {
390+
let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
391+
let llmod_raw = buffer.parse(name, llcx, handler)?;
392+
let tm = match (cgcx.tm_factory.0)() {
393+
Ok(m) => m,
394+
Err(e) => {
395+
handler.struct_err(&e).emit();
396+
return Err(FatalError)
397+
}
398+
};
399+
400+
Ok(ModuleLlvm {
401+
llmod_raw,
402+
llcx,
403+
tm,
404+
})
405+
}
406+
}
407+
378408
fn llmod(&self) -> &llvm::Module {
379409
unsafe {
380410
&*self.llmod_raw

src/librustc_codegen_llvm/llvm/ffi.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1804,7 +1804,7 @@ extern "C" {
18041804
CallbackPayload: *mut c_void,
18051805
);
18061806
pub fn LLVMRustFreeThinLTOData(Data: &'static mut ThinLTOData);
1807-
pub fn LLVMRustParseBitcodeForThinLTO(
1807+
pub fn LLVMRustParseBitcodeForLTO(
18081808
Context: &Context,
18091809
Data: *const u8,
18101810
len: usize,

0 commit comments

Comments
 (0)