diff --git a/llvm/include/llvm/CodeGen/GlobalMerge.h b/llvm/include/llvm/CodeGen/GlobalMerge.h index 13ad67d4544bc..1577bcf8903f5 100644 --- a/llvm/include/llvm/CodeGen/GlobalMerge.h +++ b/llvm/include/llvm/CodeGen/GlobalMerge.h @@ -28,6 +28,8 @@ struct GlobalMergeOptions { bool MergeConst = false; /// Whether we should merge global variables that have external linkage. bool MergeExternal = true; + /// Whether we should merge constant global variables. + bool MergeConstantGlobals = false; /// Whether we should try to optimize for size only. /// Currently, this applies a dead simple heuristic: only consider globals /// used in minsize functions for merging. diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 20273d069bf05..c7c2178571215 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -476,7 +476,8 @@ namespace llvm { /// Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset, bool OnlyOptimizeForSize = false, - bool MergeExternalByDefault = false); + bool MergeExternalByDefault = false, + bool MergeConstantByDefault = false); /// This pass splits the stack into a safe stack and an unsafe stack to /// protect against stack-based overflow vulnerabilities. diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp index 8aa4345cfd6df..c31ba6b31ad9a 100644 --- a/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/llvm/lib/CodeGen/GlobalMerge.cpp @@ -196,11 +196,13 @@ class GlobalMerge : public FunctionPass { } explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset, - bool OnlyOptimizeForSize, bool MergeExternalGlobals) + bool OnlyOptimizeForSize, bool MergeExternalGlobals, + bool MergeConstantGlobals) : FunctionPass(ID), TM(TM) { Opt.MaxOffset = MaximalOffset; Opt.SizeOnly = OnlyOptimizeForSize; Opt.MergeExternal = MergeExternalGlobals; + Opt.MergeConstantGlobals = MergeConstantGlobals; initializeGlobalMergePass(*PassRegistry::getPassRegistry()); } @@ -475,7 +477,8 @@ bool GlobalMergeImpl::doMerge(const SmallVectorImpl &Globals, auto &DL = M.getDataLayout(); LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #" - << GlobalSet.find_first() << "\n"); + << GlobalSet.find_first() << ", total of " << Globals.size() + << "\n"); bool Changed = false; ssize_t i = GlobalSet.find_first(); @@ -551,6 +554,8 @@ bool GlobalMergeImpl::doMerge(const SmallVectorImpl &Globals, MergedGV->setAlignment(MaxAlign); MergedGV->setSection(Globals[i]->getSection()); + LLVM_DEBUG(dbgs() << "MergedGV: " << *MergedGV << "\n"); + const StructLayout *MergedLayout = DL.getStructLayout(MergedTy); for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) { GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage(); @@ -700,6 +705,11 @@ bool GlobalMergeImpl::run(Module &M) { else Globals[{AddressSpace, Section}].push_back(&GV); } + LLVM_DEBUG(dbgs() << "GV " + << ((DL.getTypeAllocSize(Ty) < Opt.MaxOffset) + ? "to merge: " + : "not to merge: ") + << GV << "\n"); } for (auto &P : Globals) @@ -710,7 +720,7 @@ bool GlobalMergeImpl::run(Module &M) { if (P.second.size() > 1) Changed |= doMerge(P.second, M, false, P.first.first); - if (EnableGlobalMergeOnConst) + if (Opt.MergeConstantGlobals) for (auto &P : ConstGlobals) if (P.second.size() > 1) Changed |= doMerge(P.second, M, true, P.first.first); @@ -720,8 +730,11 @@ bool GlobalMergeImpl::run(Module &M) { Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset, bool OnlyOptimizeForSize, - bool MergeExternalByDefault) { + bool MergeExternalByDefault, + bool MergeConstantByDefault) { bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ? MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE); - return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal); + bool MergeConstant = EnableGlobalMergeOnConst || MergeConstantByDefault; + return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal, + MergeConstant); } diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index 1ef891d1b677a..763b6edb1c09f 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -111,6 +111,15 @@ static cl::opt EnablePPCGenScalarMASSEntries( "(scalar) entries"), cl::Hidden); +static cl::opt + EnableGlobalMerge("ppc-global-merge", cl::Hidden, cl::init(false), + cl::desc("Enable the global merge pass")); + +static cl::opt + GlobalMergeMaxOffset("ppc-global-merge-max-offset", cl::Hidden, + cl::init(0x7fff), + cl::desc("Maximum global merge offset")); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() { // Register the targets RegisterTargetMachine A(getThePPC32Target()); @@ -491,6 +500,10 @@ void PPCPassConfig::addIRPasses() { } bool PPCPassConfig::addPreISel() { + if (EnableGlobalMerge) + addPass( + createGlobalMergePass(TM, GlobalMergeMaxOffset, false, false, true)); + if (MergeStringPool && getOptLevel() != CodeGenOptLevel::None) addPass(createPPCMergeStringPoolPass()); diff --git a/llvm/test/CodeGen/PowerPC/merge-private.ll b/llvm/test/CodeGen/PowerPC/merge-private.ll index 6cf276990d7ea..6ed2d6dfc542b 100644 --- a/llvm/test/CodeGen/PowerPC/merge-private.ll +++ b/llvm/test/CodeGen/PowerPC/merge-private.ll @@ -1,15 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 \ -; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: -ppc-asm-full-reg-names -ppc-global-merge=true < %s | FileCheck %s \ ; RUN: --check-prefix=AIX64 ; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 \ -; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: -ppc-asm-full-reg-names -ppc-global-merge=true < %s | FileCheck %s \ ; RUN: --check-prefix=AIX32 ; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux -mcpu=pwr8 \ -; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: -ppc-asm-full-reg-names -ppc-global-merge=true < %s | FileCheck %s \ ; RUN: --check-prefix=LINUX64LE ; RUN: llc -verify-machineinstrs -mtriple powerpc64-unknown-linux -mcpu=pwr8 \ -; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: -ppc-asm-full-reg-names -ppc-global-merge=true < %s | FileCheck %s \ ; RUN: --check-prefix=LINUX64BE @.str = private unnamed_addr constant [15 x i8] c"Private global\00", align 1 @@ -24,7 +24,7 @@ define dso_local void @print_func() { ; AIX64-NEXT: stdu r1, -128(r1) ; AIX64-NEXT: std r0, 144(r1) ; AIX64-NEXT: std r31, 120(r1) # 8-byte Folded Spill -; AIX64-NEXT: ld r31, L..C0(r2) # @__ModuleStringPool +; AIX64-NEXT: ld r31, L..C0(r2) # @_MergedGlobals ; AIX64-NEXT: mr r3, r31 ; AIX64-NEXT: bl .puts[PR] ; AIX64-NEXT: nop @@ -43,7 +43,7 @@ define dso_local void @print_func() { ; AIX32-NEXT: stwu r1, -64(r1) ; AIX32-NEXT: stw r0, 72(r1) ; AIX32-NEXT: stw r31, 60(r1) # 4-byte Folded Spill -; AIX32-NEXT: lwz r31, L..C0(r2) # @__ModuleStringPool +; AIX32-NEXT: lwz r31, L..C0(r2) # @_MergedGlobals ; AIX32-NEXT: mr r3, r31 ; AIX32-NEXT: bl .puts[PR] ; AIX32-NEXT: nop @@ -64,9 +64,9 @@ define dso_local void @print_func() { ; LINUX64LE-NEXT: .cfi_offset r30, -16 ; LINUX64LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; LINUX64LE-NEXT: stdu r1, -48(r1) -; LINUX64LE-NEXT: addis r3, r2, .L__ModuleStringPool@toc@ha +; LINUX64LE-NEXT: addis r3, r2, .L_MergedGlobals@toc@ha ; LINUX64LE-NEXT: std r0, 64(r1) -; LINUX64LE-NEXT: addi r30, r3, .L__ModuleStringPool@toc@l +; LINUX64LE-NEXT: addi r30, r3, .L_MergedGlobals@toc@l ; LINUX64LE-NEXT: mr r3, r30 ; LINUX64LE-NEXT: bl puts ; LINUX64LE-NEXT: nop @@ -87,9 +87,9 @@ define dso_local void @print_func() { ; LINUX64BE-NEXT: .cfi_def_cfa_offset 128 ; LINUX64BE-NEXT: .cfi_offset lr, 16 ; LINUX64BE-NEXT: .cfi_offset r30, -16 -; LINUX64BE-NEXT: addis r3, r2, .L__ModuleStringPool@toc@ha +; LINUX64BE-NEXT: addis r3, r2, .L_MergedGlobals@toc@ha ; LINUX64BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill -; LINUX64BE-NEXT: addi r30, r3, .L__ModuleStringPool@toc@l +; LINUX64BE-NEXT: addi r30, r3, .L_MergedGlobals@toc@l ; LINUX64BE-NEXT: mr r3, r30 ; LINUX64BE-NEXT: bl puts ; LINUX64BE-NEXT: nop