From 4fbe33593c8132fdc48647c06f4d1455bfff1c88 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 8 Oct 2022 11:09:42 -0700 Subject: [PATCH] [LTO] Make local linkage GlobalValue in non-prevailing COMDAT available_externally See the updated linkonce_resolution_comdat.ll. For a local linkage GV in a non-prevailing COMDAT, it remains defined while its leader has been made available_externally. This violates the COMDAT rule that its members must be retained or discarded as a unit. To fix this, update the regular LTO change D34803 to track local linkage GlobalValues, and port the code to ThinLTO (GlobalAliases are not handled.) Fix https://github.com/llvm/llvm-project/issues/58215: as a size optimization, we place private `__profd_` in a COMDAT with a `__profc_` key. When FuncImport.cpp makes `__profc_` available_externally due to a non-prevailing COMDAT, `__profd_` incorrectly remains private. This change makes the `__profd_` available_externally. ``` cat > c.h <<'eof' extern void bar(); inline __attribute__((noinline)) void foo() {} eof cat > m1.cc <<'eof' #include "c.h" int main() { bar(); foo(); } eof cat > m2.cc <<'eof' #include "c.h" __attribute__((noinline)) void bar() { foo(); } eof clang -O2 -fprofile-generate=./t m1.cc m2.cc -flto -fuse-ld=lld -o t_gen rm -fr t && ./t_gen && llvm-profdata show -function=foo t/default_*.profraw # one _Z3foov clang -O2 -fprofile-generate=./t m1.cc m2.cc -flto=thin -fuse-ld=lld -o t_gen rm -fr t && ./t_gen && llvm-profdata show -function=foo t/default_*.profraw # one _Z3foov ``` Reviewed By: tejohnson Differential Revision: https://reviews.llvm.org/D135427 --- llvm/lib/LTO/LTO.cpp | 10 ++--- llvm/lib/Transforms/IPO/FunctionImport.cpp | 18 +++++++- .../LTO/Resolution/X86/comdat-mixed-lto.ll | 10 +++-- .../X86/Inputs/linkonce_resolution_comdat.ll | 17 ++++++-- .../ThinLTO/X86/linkonce_resolution_comdat.ll | 41 ++++++++++++++----- 5 files changed, 74 insertions(+), 22 deletions(-) diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 10ca98f0f87d9e..0ce8519faa0c1e 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -696,11 +696,11 @@ handleNonPrevailingComdat(GlobalValue &GV, if (!NonPrevailingComdats.count(C)) return; - // Additionally need to drop externally visible global values from the comdat - // to available_externally, so that there aren't multiply defined linker - // errors. - if (!GV.hasLocalLinkage()) - GV.setLinkage(GlobalValue::AvailableExternallyLinkage); + // Additionally need to drop all global values from the comdat to + // available_externally, to satisfy the COMDAT requirement that all members + // are discarded as a unit. The non-local linkage global values avoid + // duplicate definition linker errors. + GV.setLinkage(GlobalValue::AvailableExternallyLinkage); if (auto GO = dyn_cast(&GV)) GO->setComdat(nullptr); diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index b589ec798caa10..ca14df7e841095 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -1051,6 +1051,7 @@ bool llvm::convertToDeclaration(GlobalValue &GV) { void llvm::thinLTOFinalizeInModule(Module &TheModule, const GVSummaryMapTy &DefinedGlobals, bool PropagateAttrs) { + DenseSet NonPrevailingComdats; auto FinalizeInModule = [&](GlobalValue &GV, bool Propagate = false) { // See if the global summary analysis computed a new resolved linkage. const auto &GS = DefinedGlobals.find(GV.getGUID()); @@ -1128,8 +1129,10 @@ void llvm::thinLTOFinalizeInModule(Module &TheModule, // as this is a declaration for the linker, and will be dropped eventually. // It is illegal for comdats to contain declarations. auto *GO = dyn_cast_or_null(&GV); - if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) + if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) { + NonPrevailingComdats.insert(GO->getComdat()); GO->setComdat(nullptr); + } }; // Process functions and global now @@ -1139,6 +1142,19 @@ void llvm::thinLTOFinalizeInModule(Module &TheModule, FinalizeInModule(GV); for (auto &GV : TheModule.aliases()) FinalizeInModule(GV); + + // For a non-prevailing comdat, all its members must be available_externally. + // FinalizeInModule has handled non-local-linkage GlobalValues. Here we handle + // local linkage GlobalValues. + if (NonPrevailingComdats.empty()) + return; + for (auto &GO : TheModule.global_objects()) { + if (auto *C = GO.getComdat(); C && NonPrevailingComdats.count(C)) { + GO.setComdat(nullptr); + assert(GO.hasLocalLinkage() && "GO's comdat should have been dropped"); + GO.setLinkage(GlobalValue::AvailableExternallyLinkage); + } + } } /// Run internalization on \p TheModule based on symmary analysis. diff --git a/llvm/test/LTO/Resolution/X86/comdat-mixed-lto.ll b/llvm/test/LTO/Resolution/X86/comdat-mixed-lto.ll index d3730f4e9bcda1..1adaf6f973154d 100644 --- a/llvm/test/LTO/Resolution/X86/comdat-mixed-lto.ll +++ b/llvm/test/LTO/Resolution/X86/comdat-mixed-lto.ll @@ -8,7 +8,7 @@ ; The copy of C from this module is prevailing. The copy of C from the ; regular LTO module is not prevailing, and will be dropped to ; available_externally. -; RUN: llvm-lto2 run -r=%t1.o,C,pl -r=%t2.o,C,l -r=%t2.o,testglobfunc,lxp -r=%t1.o,testglobfunc,lx -o %t3 %t1.o %t2.o -save-temps +; RUN: llvm-lto2 run -r=%t1.o,C,pl -r=%t2.o,C,l -r=%t1.o,testglobfunc,lxp -r=%t2.o,testglobfunc,lx -o %t3 %t1.o %t2.o -save-temps ; The Input module (regular LTO) is %t3.0. Check to make sure that we removed ; __cxx_global_var_init and testglobfunc from comdat. Also check to ensure @@ -16,8 +16,12 @@ ; have linker multiply defined errors as it is no longer in a comdat and ; would clash with the copy from this module. ; RUN: llvm-dis %t3.0.0.preopt.bc -o - | FileCheck %s -; CHECK: define internal void @__cxx_global_var_init() section ".text.startup" { -; CHECK: define available_externally dso_local void @testglobfunc() section ".text.startup" { + +; CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__cxx_global_var_init, ptr @C }] +; CHECK: @C = available_externally dso_local global %"class.Test::ptr" zeroinitializer, align 4 +; CHECK-NOT: declare +; CHECK: declare dso_local void @__cxx_global_var_init() section ".text.startup" +; CHECK-NOT: declare ; ModuleID = 'comdat-mixed-lto.o' source_filename = "comdat-mixed-lto.cpp" diff --git a/llvm/test/ThinLTO/X86/Inputs/linkonce_resolution_comdat.ll b/llvm/test/ThinLTO/X86/Inputs/linkonce_resolution_comdat.ll index 92b5182315943e..f5b3130fd15202 100644 --- a/llvm/test/ThinLTO/X86/Inputs/linkonce_resolution_comdat.ll +++ b/llvm/test/ThinLTO/X86/Inputs/linkonce_resolution_comdat.ll @@ -1,13 +1,24 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -$c2 = comdat any +$f = comdat any +$g = comdat any -define linkonce_odr i32 @f(i8*) unnamed_addr comdat($c2) { +@g_private = private global i32 41, comdat($g) + +define linkonce_odr i32 @f(i8*) unnamed_addr comdat($f) { + ret i32 41 +} + +define linkonce_odr i32 @g() unnamed_addr comdat($g) { ret i32 41 } -define i32 @g() { +define internal void @g_internal() unnamed_addr comdat($g) { + ret void +} + +define i32 @h() { %i = call i32 @f(i8* null) ret i32 %i } diff --git a/llvm/test/ThinLTO/X86/linkonce_resolution_comdat.ll b/llvm/test/ThinLTO/X86/linkonce_resolution_comdat.ll index 7b22180132e6a1..2fb226046ea9f5 100644 --- a/llvm/test/ThinLTO/X86/linkonce_resolution_comdat.ll +++ b/llvm/test/ThinLTO/X86/linkonce_resolution_comdat.ll @@ -1,33 +1,54 @@ -; This test ensures that we drop the preempted copy of @f from %t2.bc from its -; comdat after making it available_externally. If not we would get a -; verification error. +; This test ensures that we drop the preempted copy of @f/@g from %t2.bc from their +; comdats after making it available_externally. If not we would get a +; verification error. g_internal/g_private are changed to available_externally +; as well since it is in the same comdat of g. ; RUN: opt -module-summary %s -o %t1.bc ; RUN: opt -module-summary %p/Inputs/linkonce_resolution_comdat.ll -o %t2.bc -; RUN: llvm-lto -thinlto-action=run -disable-thinlto-funcattrs=0 %t1.bc %t2.bc -exported-symbol=f -exported-symbol=g -thinlto-save-temps=%t3. +; RUN: llvm-lto -thinlto-action=run -disable-thinlto-funcattrs=0 %t1.bc %t2.bc -exported-symbol=f -exported-symbol=g -exported-symbol=h -thinlto-save-temps=%t3. ; RUN: llvm-dis %t3.0.3.imported.bc -o - | FileCheck %s --check-prefix=IMPORT1 ; RUN: llvm-dis %t3.1.3.imported.bc -o - | FileCheck %s --check-prefix=IMPORT2 ; Copy from first module is prevailing and converted to weak_odr, copy ; from second module is preempted and converted to available_externally and ; removed from comdat. -; IMPORT1: define weak_odr i32 @f(i8* %0) unnamed_addr [[ATTR:#[0-9]+]] comdat($c1) { +; IMPORT1: @g_private = private global i32 43, comdat($g) +; IMPORT1: define weak_odr i32 @f(i8* %0) unnamed_addr [[ATTR:#[0-9]+]] comdat { +; IMPORT1: define weak_odr i32 @g() unnamed_addr [[ATTR]] comdat { +; IMPORT1: define internal void @g_internal() unnamed_addr comdat($g) { + +; IMPORT2: @g_private = available_externally dso_local global i32 41{{$}} ; IMPORT2: define available_externally i32 @f(i8* %0) unnamed_addr [[ATTR:#[0-9]+]] { +; IMPORT2: define available_externally i32 @g() unnamed_addr [[ATTR]] { +; IMPORT2: define available_externally dso_local void @g_internal() unnamed_addr { ; CHECK-DAG: attributes [[ATTR]] = { norecurse nounwind } -; RUN: llvm-nm -o - < %t1.bc.thinlto.o | FileCheck %s --check-prefix=NM1 +; RUN: llvm-nm %t1.bc.thinlto.o | FileCheck %s --check-prefix=NM1 ; NM1: W f +; NM1: W g -; RUN: llvm-nm -o - < %t2.bc.thinlto.o | FileCheck %s --check-prefix=NM2 +; RUN: llvm-nm %t2.bc.thinlto.o | FileCheck %s --check-prefix=NM2 ; f() would have been turned into available_externally since it is preempted, -; and inlined into g() +; and inlined into h() ; NM2-NOT: f +; NM2-NOT: g target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -$c1 = comdat any +$f = comdat any +$g = comdat any + +@g_private = private global i32 43, comdat($g) -define linkonce_odr i32 @f(i8*) unnamed_addr comdat($c1) { +define linkonce_odr i32 @f(i8*) unnamed_addr comdat { ret i32 43 } + +define linkonce_odr i32 @g() unnamed_addr comdat { + ret i32 43 +} + +define internal void @g_internal() unnamed_addr comdat($g) { + ret void +}