From 807def39808e3a66c474574f9b93d6ee7bbecc49 Mon Sep 17 00:00:00 2001 From: DianQK Date: Sun, 17 Nov 2024 14:21:23 +0800 Subject: [PATCH] The embedded bitcode should always be prepared for LTO/ThinLTO --- .../rustc_codegen_cranelift/src/driver/aot.rs | 2 +- compiler/rustc_codegen_llvm/src/back/lto.rs | 12 +- compiler/rustc_codegen_llvm/src/back/write.rs | 151 ++++++++++++------ compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 1 + compiler/rustc_codegen_ssa/src/back/write.rs | 3 + compiler/rustc_session/src/config.rs | 11 +- .../run-make/pgo-embed-bc-lto/interesting.rs | 16 ++ tests/run-make/pgo-embed-bc-lto/main.rs | 5 + tests/run-make/pgo-embed-bc-lto/opaque.rs | 5 + tests/run-make/pgo-embed-bc-lto/rmake.rs | 57 +++++++ tests/ui/asm/inline-syntax.arm.stderr | 11 +- 11 files changed, 222 insertions(+), 52 deletions(-) create mode 100644 tests/run-make/pgo-embed-bc-lto/interesting.rs create mode 100644 tests/run-make/pgo-embed-bc-lto/main.rs create mode 100644 tests/run-make/pgo-embed-bc-lto/opaque.rs create mode 100644 tests/run-make/pgo-embed-bc-lto/rmake.rs diff --git a/compiler/rustc_codegen_cranelift/src/driver/aot.rs b/compiler/rustc_codegen_cranelift/src/driver/aot.rs index 8eab73ad5f9f8..918044e5c3f85 100644 --- a/compiler/rustc_codegen_cranelift/src/driver/aot.rs +++ b/compiler/rustc_codegen_cranelift/src/driver/aot.rs @@ -204,7 +204,7 @@ fn produce_final_output_artifacts( // to get rid of it. for output_type in crate_output.outputs.keys() { match *output_type { - OutputType::Bitcode | OutputType::ThinLinkBitcode => { + OutputType::Bitcode | OutputType::ThinLinkBitcode | OutputType::ThinBitcode => { // Cranelift doesn't have bitcode // user_wants_bitcode = true; // // Copy to .bc, but always keep the .0.bc. There is a later diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index 48beb9be2b2a1..006c45da37338 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -604,7 +604,17 @@ pub(crate) fn run_pass_manager( debug!("running the pass manager"); let opt_stage = if thin { llvm::OptStage::ThinLTO } else { llvm::OptStage::FatLTO }; let opt_level = config.opt_level.unwrap_or(config::OptLevel::No); - unsafe { write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage) }?; + unsafe { + write::llvm_optimize( + cgcx, + dcx, + module.module_llvm.llmod(), + &*module.module_llvm.tm, + config, + opt_level, + opt_stage, + ) + }?; debug!("lto done"); Ok(()) } diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs index a65ae4df1e378..a3dfc341830ee 100644 --- a/compiler/rustc_codegen_llvm/src/back/write.rs +++ b/compiler/rustc_codegen_llvm/src/back/write.rs @@ -513,7 +513,8 @@ fn get_instr_profile_output_path(config: &ModuleConfig) -> Option { pub(crate) unsafe fn llvm_optimize( cgcx: &CodegenContext, dcx: DiagCtxtHandle<'_>, - module: &ModuleCodegen, + llmod: &llvm::Module, + tm: &llvm::TargetMachine, config: &ModuleConfig, opt_level: config::OptLevel, opt_stage: llvm::OptStage, @@ -572,8 +573,8 @@ pub(crate) unsafe fn llvm_optimize( let result = unsafe { llvm::LLVMRustOptimize( - module.module_llvm.llmod(), - &*module.module_llvm.tm, + llmod, + tm, to_pass_builder_opt_level(opt_level), opt_stage, cgcx.opts.cg.linker_plugin_lto.enabled(), @@ -635,8 +636,51 @@ pub(crate) unsafe fn optimize( _ if cgcx.opts.cg.linker_plugin_lto.enabled() => llvm::OptStage::PreLinkThinLTO, _ => llvm::OptStage::PreLinkNoLTO, }; - return unsafe { llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage) }; + if opt_stage == llvm::OptStage::PreLinkNoLTO + && config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) + { + let _timer = cgcx.prof.generic_activity_with_arg( + "LLVM_module_codegen_prepare_embed_bitcode", + &*module.name, + ); + // The embedded bitcode is used to run LTO/ThinLTO. + // `OptStage::PreLinkNoLTO` is not suitable as input for LTO, + // as it may run certain passes that cannot be executed multiple times, + // such as LLVM's Call Graph Profile pass. So, we create a copy to + // run `OptStage::PreLinkThinLTO` for the subsequent LTO process. + let llmod = unsafe { llvm::LLVMCloneModule(module.module_llvm.llmod()) }; + unsafe { + llvm_optimize( + cgcx, + dcx, + llmod, + &*module.module_llvm.tm, + config, + opt_level, + llvm::OptStage::PreLinkThinLTO, + ) + }?; + let embed_thin = + ThinBuffer::new(llmod, config.emit_thin_lto, config.emit_thin_lto_summary); + let thin_bc_out = cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name); + if let Err(err) = fs::write(&thin_bc_out, embed_thin.data()) { + dcx.emit_err(WriteBytecode { path: &thin_bc_out, err }); + } + unsafe { llvm::LLVMDisposeModule(llmod) }; + } + unsafe { + llvm_optimize( + cgcx, + dcx, + module.module_llvm.llmod(), + &*module.module_llvm.tm, + config, + opt_level, + opt_stage, + ) + }?; } + Ok(()) } @@ -716,11 +760,54 @@ pub(crate) unsafe fn codegen( // asm from LLVM and use `gcc` to create the object file. let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name); - let bc_summary_out = - cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name); let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name); + if config.emit_ir { + let _timer = + cgcx.prof.generic_activity_with_arg("LLVM_module_codegen_emit_ir", &*module.name); + let out = cgcx.output_filenames.temp_path(OutputType::LlvmAssembly, module_name); + let out_c = path_to_c_string(&out); + + extern "C" fn demangle_callback( + input_ptr: *const c_char, + input_len: size_t, + output_ptr: *mut c_char, + output_len: size_t, + ) -> size_t { + let input = + unsafe { slice::from_raw_parts(input_ptr as *const u8, input_len as usize) }; + + let Ok(input) = str::from_utf8(input) else { return 0 }; + + let output = unsafe { + slice::from_raw_parts_mut(output_ptr as *mut u8, output_len as usize) + }; + let mut cursor = io::Cursor::new(output); + + let Ok(demangled) = rustc_demangle::try_demangle(input) else { return 0 }; + + if write!(cursor, "{demangled:#}").is_err() { + // Possible only if provided buffer is not big enough + return 0; + } + + cursor.position() as size_t + } + + let result = + unsafe { llvm::LLVMRustPrintModule(llmod, out_c.as_ptr(), demangle_callback) }; + + if result == llvm::LLVMRustResult::Success { + record_artifact_size(&cgcx.prof, "llvm_ir", &out); + } + + result.into_result().map_err(|()| llvm_err(dcx, LlvmError::WriteIr { path: &out }))?; + } + if config.bitcode_needed() { + let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name); + let bc_summary_out = + cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name); let _timer = cgcx .prof .generic_activity_with_arg("LLVM_module_codegen_make_bitcode", &*module.name); @@ -767,54 +854,22 @@ pub(crate) unsafe fn codegen( let _timer = cgcx .prof .generic_activity_with_arg("LLVM_module_codegen_embed_bitcode", &*module.name); + let thin_bc_out = + cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name); + let thin_data; + let mut data = data; + if thin_bc_out.exists() { + thin_data = fs::read(&thin_bc_out).unwrap(); + debug!("removing embed bitcode file {:?}", thin_bc_out); + ensure_removed(dcx, &thin_bc_out); + data = thin_data.as_slice(); + } unsafe { embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, data); } } } - if config.emit_ir { - let _timer = - cgcx.prof.generic_activity_with_arg("LLVM_module_codegen_emit_ir", &*module.name); - let out = cgcx.output_filenames.temp_path(OutputType::LlvmAssembly, module_name); - let out_c = path_to_c_string(&out); - - extern "C" fn demangle_callback( - input_ptr: *const c_char, - input_len: size_t, - output_ptr: *mut c_char, - output_len: size_t, - ) -> size_t { - let input = - unsafe { slice::from_raw_parts(input_ptr as *const u8, input_len as usize) }; - - let Ok(input) = str::from_utf8(input) else { return 0 }; - - let output = unsafe { - slice::from_raw_parts_mut(output_ptr as *mut u8, output_len as usize) - }; - let mut cursor = io::Cursor::new(output); - - let Ok(demangled) = rustc_demangle::try_demangle(input) else { return 0 }; - - if write!(cursor, "{demangled:#}").is_err() { - // Possible only if provided buffer is not big enough - return 0; - } - - cursor.position() as size_t - } - - let result = - unsafe { llvm::LLVMRustPrintModule(llmod, out_c.as_ptr(), demangle_callback) }; - - if result == llvm::LLVMRustResult::Success { - record_artifact_size(&cgcx.prof, "llvm_ir", &out); - } - - result.into_result().map_err(|()| llvm_err(dcx, LlvmError::WriteIr { path: &out }))?; - } - if config.emit_asm { let _timer = cgcx.prof.generic_activity_with_arg("LLVM_module_codegen_emit_asm", &*module.name); diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 17b0ec4b9360a..55af32241aee0 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -870,6 +870,7 @@ unsafe extern "C" { pub fn LLVMModuleCreateWithNameInContext(ModuleID: *const c_char, C: &Context) -> &Module; pub fn LLVMGetModuleContext(M: &Module) -> &Context; pub fn LLVMCloneModule(M: &Module) -> &Module; + pub fn LLVMDisposeModule(M: &Module); /// Data layout. See Module::getDataLayout. pub fn LLVMGetDataLayoutStr(M: &Module) -> *const c_char; diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs index a2285bf9204a4..b01865ae4f230 100644 --- a/compiler/rustc_codegen_ssa/src/back/write.rs +++ b/compiler/rustc_codegen_ssa/src/back/write.rs @@ -618,6 +618,9 @@ fn produce_final_output_artifacts( // them for making an rlib. copy_if_one_unit(OutputType::Bitcode, true); } + OutputType::ThinBitcode => { + copy_if_one_unit(OutputType::ThinBitcode, true); + } OutputType::ThinLinkBitcode => { copy_if_one_unit(OutputType::ThinLinkBitcode, false); } diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs index d60c56fee756f..71eef61f3f79f 100644 --- a/compiler/rustc_session/src/config.rs +++ b/compiler/rustc_session/src/config.rs @@ -508,6 +508,7 @@ impl FromStr for SplitDwarfKind { pub enum OutputType { Bitcode, ThinLinkBitcode, + ThinBitcode, Assembly, LlvmAssembly, Mir, @@ -538,6 +539,7 @@ impl OutputType { OutputType::Exe | OutputType::DepInfo | OutputType::Metadata => true, OutputType::Bitcode | OutputType::ThinLinkBitcode + | OutputType::ThinBitcode | OutputType::Assembly | OutputType::LlvmAssembly | OutputType::Mir @@ -549,6 +551,7 @@ impl OutputType { match *self { OutputType::Bitcode => "llvm-bc", OutputType::ThinLinkBitcode => "thin-link-bitcode", + OutputType::ThinBitcode => "thin-llvm-bc", OutputType::Assembly => "asm", OutputType::LlvmAssembly => "llvm-ir", OutputType::Mir => "mir", @@ -566,6 +569,7 @@ impl OutputType { "mir" => OutputType::Mir, "llvm-bc" => OutputType::Bitcode, "thin-link-bitcode" => OutputType::ThinLinkBitcode, + "thin-llvm-bc" => OutputType::ThinBitcode, "obj" => OutputType::Object, "metadata" => OutputType::Metadata, "link" => OutputType::Exe, @@ -576,9 +580,10 @@ impl OutputType { fn shorthands_display() -> String { format!( - "`{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`", + "`{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}", OutputType::Bitcode.shorthand(), OutputType::ThinLinkBitcode.shorthand(), + OutputType::ThinBitcode.shorthand(), OutputType::Assembly.shorthand(), OutputType::LlvmAssembly.shorthand(), OutputType::Mir.shorthand(), @@ -593,6 +598,7 @@ impl OutputType { match *self { OutputType::Bitcode => "bc", OutputType::ThinLinkBitcode => "indexing.o", + OutputType::ThinBitcode => "thin.bc", OutputType::Assembly => "s", OutputType::LlvmAssembly => "ll", OutputType::Mir => "mir", @@ -611,6 +617,7 @@ impl OutputType { | OutputType::DepInfo => true, OutputType::Bitcode | OutputType::ThinLinkBitcode + | OutputType::ThinBitcode | OutputType::Object | OutputType::Metadata | OutputType::Exe => false, @@ -698,6 +705,7 @@ impl OutputTypes { self.0.keys().any(|k| match *k { OutputType::Bitcode | OutputType::ThinLinkBitcode + | OutputType::ThinBitcode | OutputType::Assembly | OutputType::LlvmAssembly | OutputType::Mir @@ -712,6 +720,7 @@ impl OutputTypes { self.0.keys().any(|k| match *k { OutputType::Bitcode | OutputType::ThinLinkBitcode + | OutputType::ThinBitcode | OutputType::Assembly | OutputType::LlvmAssembly | OutputType::Mir diff --git a/tests/run-make/pgo-embed-bc-lto/interesting.rs b/tests/run-make/pgo-embed-bc-lto/interesting.rs new file mode 100644 index 0000000000000..13105c17e126d --- /dev/null +++ b/tests/run-make/pgo-embed-bc-lto/interesting.rs @@ -0,0 +1,16 @@ +#![crate_name = "interesting"] +#![crate_type = "rlib"] + +extern crate opaque; + +#[no_mangle] +#[inline(never)] +pub fn function_called_once() { + opaque::foo(); +} + +// CHECK-LABEL: @function_called_once +// CHECK-SAME: !prof [[function_called_once_id:![0-9]+]] { +// CHECK: "CG Profile" +// CHECK-NOT: "CG Profile" +// CHECK-DAG: [[function_called_once_id]] = !{!"function_entry_count", i64 1} diff --git a/tests/run-make/pgo-embed-bc-lto/main.rs b/tests/run-make/pgo-embed-bc-lto/main.rs new file mode 100644 index 0000000000000..ce8747bef3c2d --- /dev/null +++ b/tests/run-make/pgo-embed-bc-lto/main.rs @@ -0,0 +1,5 @@ +extern crate interesting; + +fn main() { + interesting::function_called_once(); +} diff --git a/tests/run-make/pgo-embed-bc-lto/opaque.rs b/tests/run-make/pgo-embed-bc-lto/opaque.rs new file mode 100644 index 0000000000000..b4467dc779680 --- /dev/null +++ b/tests/run-make/pgo-embed-bc-lto/opaque.rs @@ -0,0 +1,5 @@ +#![crate_name = "opaque"] +#![crate_type = "rlib"] + +#[inline(never)] +pub fn foo() {} diff --git a/tests/run-make/pgo-embed-bc-lto/rmake.rs b/tests/run-make/pgo-embed-bc-lto/rmake.rs new file mode 100644 index 0000000000000..e82d09668cd0f --- /dev/null +++ b/tests/run-make/pgo-embed-bc-lto/rmake.rs @@ -0,0 +1,57 @@ +// This test case verifies that we successfully complete an LTO build with PGO +// using the embedded bitcode. +// It also ensures that the generated IR correctly includes the call results. + +//@ needs-profiler-runtime +//@ ignore-cross-compile + +use std::path::Path; + +use run_make_support::{ + has_extension, has_prefix, llvm_filecheck, llvm_profdata, rfs, run, rustc, shallow_find_files, +}; + +fn main() { + let path_prof_data_dir = Path::new("prof_data_dir"); + let path_merged_profdata = path_prof_data_dir.join("merged.profdata"); + rustc().input("opaque.rs").codegen_units(1).run(); + rfs::create_dir_all(&path_prof_data_dir); + rustc() + .input("interesting.rs") + .profile_generate(&path_prof_data_dir) + .opt() + .codegen_units(1) + .run(); + rustc() + .input("main.rs") + .arg("-Clto=thin") + .opt() + .codegen_units(1) + .profile_generate(&path_prof_data_dir) + .opt() + .run(); + run("main"); + llvm_profdata().merge().output(&path_merged_profdata).input(path_prof_data_dir).run(); + rustc() + .input("interesting.rs") + .profile_use(&path_merged_profdata) + .opt() + .codegen_units(1) + .emit("link") + .run(); + rustc() + .input("main.rs") + .arg("-Clto=thin") + .opt() + .codegen_units(1) + .profile_use(&path_merged_profdata) + .emit("llvm-ir,link") + .opt() + .run(); + let files = shallow_find_files(".", |path| { + has_prefix(path, "main.interesting.interesting") && has_extension(path, "ll") + }); + assert_eq!(files.len(), 1); + let llvm_ir = &files[0]; + llvm_filecheck().patterns("interesting.rs").stdin_buf(rfs::read(llvm_ir)).run(); +} diff --git a/tests/ui/asm/inline-syntax.arm.stderr b/tests/ui/asm/inline-syntax.arm.stderr index 61e5078d6d9b4..b77c24863d6b8 100644 --- a/tests/ui/asm/inline-syntax.arm.stderr +++ b/tests/ui/asm/inline-syntax.arm.stderr @@ -6,6 +6,15 @@ note: instantiated into assembly here LL | .intel_syntax noprefix | ^ +error: unknown directive + | +note: instantiated into assembly here + --> :1:1 + | +LL | .intel_syntax noprefix + | ^ + = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` + error: unknown directive | note: instantiated into assembly here @@ -86,5 +95,5 @@ note: instantiated into assembly here LL | .intel_syntax noprefix | ^ -error: aborting due to 8 previous errors +error: aborting due to 9 previous errors