-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Move ExpandMemCmp and MergeIcmp to the middle end #77370
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-transforms Author: Gabriel Baraldi (gbaraldi) ChangesThis should allow for optimizations like saving repeated loads between memcmp calls. c.f https://godbolt.org/z/bEna4Md9r, the window on the right shows the generated output of this branch. One question is where to put this in the pipeline. The inline expansions probably benefit the most from the passes we run early, but we might want to wait a bit so that we can try and prove a constant argument to the memcmp (though I imagine if they are going to be constant, it's directly from the source). One other question is that some of the x86 tests are massive, I just ported over from the Patch is 4.02 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/77370.diff 83 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
index a7cbb0910baabf..556304231b397b 100644
--- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
@@ -25,7 +25,6 @@
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/CodeGen/CallBrPrepare.h"
#include "llvm/CodeGen/DwarfEHPrepare.h"
-#include "llvm/CodeGen/ExpandMemCmp.h"
#include "llvm/CodeGen/ExpandReductions.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/IndirectBrExpand.h"
@@ -629,15 +628,6 @@ void CodeGenPassBuilder<Derived>::addIRPasses(AddIRPass &addPass) const {
addPass(PrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n"));
}
- if (getOptLevel() != CodeGenOptLevel::None) {
- // The MergeICmpsPass tries to create memcmp calls by grouping sequences of
- // loads and compares. ExpandMemCmpPass then tries to expand those calls
- // into optimally-sized loads and compares. The transforms are enabled by a
- // target lowering hook.
- if (!Opt.DisableMergeICmps)
- addPass(MergeICmpsPass());
- addPass(ExpandMemCmpPass(&TM));
- }
// Run GC lowering passes for builtin collectors
// TODO: add a pass insertion point here
diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
index f950dfae7e338b..3c00668aae3897 100644
--- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def
+++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
@@ -47,7 +47,6 @@ FUNCTION_PASS("dwarf-eh-prepare", DwarfEHPreparePass, (TM))
FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false))
FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass, (TM))
FUNCTION_PASS("expand-large-fp-convert", ExpandLargeFpConvertPass, (TM))
-FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass, (TM))
FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ())
FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ())
FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass, (TM))
@@ -55,7 +54,6 @@ FUNCTION_PASS("interleaved-access", InterleavedAccessPass, (TM))
FUNCTION_PASS("interleaved-load-combine", InterleavedLoadCombinePass, (TM))
FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass, ())
FUNCTION_PASS("lowerinvoke", LowerInvokePass, ())
-FUNCTION_PASS("mergeicmps", MergeICmpsPass, ())
FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ())
FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true))
FUNCTION_PASS("replace-with-veclib", ReplaceWithVeclib, ())
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index ca9fbb1def7624..e5ed5f15f62ed7 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -519,8 +519,6 @@ namespace llvm {
// Expands large div/rem instructions.
FunctionPass *createExpandLargeFpConvertPass();
- // This pass expands memcmp() to load/stores.
- FunctionPass *createExpandMemCmpLegacyPass();
/// Creates Break False Dependencies pass. \see BreakFalseDeps.cpp
FunctionPass *createBreakFalseDeps();
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 46b1e95c3c15f3..b0ca9fa942cda3 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -103,7 +103,6 @@ void initializeEdgeBundlesPass(PassRegistry&);
void initializeEHContGuardCatchretPass(PassRegistry &);
void initializeExpandLargeFpConvertLegacyPassPass(PassRegistry&);
void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&);
-void initializeExpandMemCmpLegacyPassPass(PassRegistry &);
void initializeExpandPostRAPass(PassRegistry&);
void initializeExpandReductionsPass(PassRegistry&);
void initializeExpandVectorPredicationPass(PassRegistry &);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 7a21876e565a7c..9aff428fbe938b 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -119,7 +119,6 @@ namespace {
(void) llvm::createPostDomTree();
(void) llvm::createMergeICmpsLegacyPass();
(void) llvm::createExpandLargeDivRemPass();
- (void)llvm::createExpandMemCmpLegacyPass();
(void) llvm::createExpandVectorPredicationPass();
std::string buf;
llvm::raw_string_ostream os(buf);
diff --git a/llvm/include/llvm/CodeGen/ExpandMemCmp.h b/llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h
similarity index 83%
rename from llvm/include/llvm/CodeGen/ExpandMemCmp.h
rename to llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h
index 94a877854f327a..94ba0cf9305040 100644
--- a/llvm/include/llvm/CodeGen/ExpandMemCmp.h
+++ b/llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_EXPANDMEMCMP_H
-#define LLVM_CODEGEN_EXPANDMEMCMP_H
+#ifndef LLVM_TRANSFORMS_SCALAR_EXPANDMEMCMP_H
+#define LLVM_TRANSFORMS_SCALAR_EXPANDMEMCMP_H
#include "llvm/IR/PassManager.h"
@@ -26,4 +26,4 @@ class ExpandMemCmpPass : public PassInfoMixin<ExpandMemCmpPass> {
} // namespace llvm
-#endif // LLVM_CODEGEN_EXPANDMEMCMP_H
+#endif // LLVM_TRANSFORMS_SCALAR_EXPANDMEMCMP_H
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index df2d1831ee5fdb..518432e9a7b32f 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -71,7 +71,6 @@ add_llvm_component_library(LLVMCodeGen
ExecutionDomainFix.cpp
ExpandLargeDivRem.cpp
ExpandLargeFpConvert.cpp
- ExpandMemCmp.cpp
ExpandPostRAPseudos.cpp
ExpandReductions.cpp
ExpandVectorPredication.cpp
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 7b73a7b11ddf1c..043fa4e6eabe8f 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -41,7 +41,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeEarlyTailDuplicatePass(Registry);
initializeExpandLargeDivRemLegacyPassPass(Registry);
initializeExpandLargeFpConvertLegacyPassPass(Registry);
- initializeExpandMemCmpLegacyPassPass(Registry);
initializeExpandPostRAPass(Registry);
initializeFEntryInserterPass(Registry);
initializeFinalizeISelPass(Registry);
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 4003a08a5422dd..33562e90e94426 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -108,9 +108,6 @@ static cl::opt<bool> EnableImplicitNullChecks(
"enable-implicit-null-checks",
cl::desc("Fold null checks into faulting memory operations"),
cl::init(false), cl::Hidden);
-static cl::opt<bool> DisableMergeICmps("disable-mergeicmps",
- cl::desc("Disable MergeICmps Pass"),
- cl::init(false), cl::Hidden);
static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
cl::desc("Print LLVM IR produced by the loop-reduce pass"));
static cl::opt<bool>
@@ -487,7 +484,6 @@ CGPassBuilderOption llvm::getCGPassBuilderOption() {
SET_BOOLEAN_OPTION(EnableImplicitNullChecks)
SET_BOOLEAN_OPTION(EnableMachineOutliner)
SET_BOOLEAN_OPTION(MISchedPostRA)
- SET_BOOLEAN_OPTION(DisableMergeICmps)
SET_BOOLEAN_OPTION(DisableLSR)
SET_BOOLEAN_OPTION(DisableConstantHoisting)
SET_BOOLEAN_OPTION(DisableCGP)
@@ -872,13 +868,6 @@ void TargetPassConfig::addIRPasses() {
"\n\n*** Code after LSR ***\n"));
}
- // The MergeICmpsPass tries to create memcmp calls by grouping sequences of
- // loads and compares. ExpandMemCmpPass then tries to expand those calls
- // into optimally-sized loads and compares. The transforms are enabled by a
- // target lowering hook.
- if (!DisableMergeICmps)
- addPass(createMergeICmpsLegacyPass());
- addPass(createExpandMemCmpLegacyPass());
}
// Run GC lowering passes for builtin collectors
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 439f749bda8bb7..20448554756aca 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -76,7 +76,6 @@
#include "llvm/CodeGen/DwarfEHPrepare.h"
#include "llvm/CodeGen/ExpandLargeDivRem.h"
#include "llvm/CodeGen/ExpandLargeFpConvert.h"
-#include "llvm/CodeGen/ExpandMemCmp.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/HardwareLoops.h"
#include "llvm/CodeGen/IndirectBrExpand.h"
@@ -181,6 +180,7 @@
#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
#include "llvm/Transforms/Scalar/DivRemPairs.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
+#include "llvm/Transforms/Scalar/ExpandMemCmp.h"
#include "llvm/Transforms/Scalar/FlattenCFG.h"
#include "llvm/Transforms/Scalar/Float2Int.h"
#include "llvm/Transforms/Scalar/GVN.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 5c6c391049a7b2..e2dd413f12d696 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -86,6 +86,7 @@
#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
#include "llvm/Transforms/Scalar/DivRemPairs.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
+#include "llvm/Transforms/Scalar/ExpandMemCmp.h"
#include "llvm/Transforms/Scalar/Float2Int.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/IndVarSimplify.h"
@@ -111,6 +112,7 @@
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
+#include "llvm/Transforms/Scalar/MergeICmps.h"
#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
#include "llvm/Transforms/Scalar/NewGVN.h"
#include "llvm/Transforms/Scalar/Reassociate.h"
@@ -386,6 +388,8 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
if (AreStatisticsEnabled())
FPM.addPass(CountVisitsPass());
+ FPM.addPass(MergeICmpsPass());
+ FPM.addPass(ExpandMemCmpPass(TM));
// Form SSA out of local memory accesses after breaking apart aggregates into
// scalars.
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
@@ -532,6 +536,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
if (AreStatisticsEnabled())
FPM.addPass(CountVisitsPass());
+ FPM.addPass(MergeICmpsPass());
+ FPM.addPass(ExpandMemCmpPass(TM));
// Form SSA out of local memory accesses after breaking apart aggregates into
// scalars.
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 82ce040c649626..31adbf1942b410 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -353,6 +353,7 @@ FUNCTION_PASS("mem2reg", PromotePass())
FUNCTION_PASS("memcpyopt", MemCpyOptPass())
FUNCTION_PASS("memprof", MemProfilerPass())
FUNCTION_PASS("mergeicmps", MergeICmpsPass())
+FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass(TM))
FUNCTION_PASS("mergereturn", UnifyFunctionExitNodesPass())
FUNCTION_PASS("move-auto-init", MoveAutoInitPass())
FUNCTION_PASS("nary-reassociate", NaryReassociatePass())
@@ -415,7 +416,7 @@ FUNCTION_PASS("structurizecfg", StructurizeCFGPass())
FUNCTION_PASS("tailcallelim", TailCallElimPass())
FUNCTION_PASS("tlshoist", TLSVariableHoistPass())
FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
-FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass())
+FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass())
FUNCTION_PASS("tsan", ThreadSanitizerPass())
FUNCTION_PASS("typepromotion", TypePromotionPass(TM))
FUNCTION_PASS("unify-loop-exits", UnifyLoopExitsPass())
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index 2dd27037a17de7..f6e666dd071256 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -11,6 +11,7 @@ add_llvm_component_library(LLVMScalarOpts
DeadStoreElimination.cpp
DFAJumpThreading.cpp
DivRemPairs.cpp
+ ExpandMemCmp.cpp
EarlyCSE.cpp
FlattenCFGPass.cpp
Float2Int.cpp
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
similarity index 90%
rename from llvm/lib/CodeGen/ExpandMemCmp.cpp
rename to llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
index bb84813569f4d5..973875ee142978 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
@@ -11,21 +11,22 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/ExpandMemCmp.h"
+#include "llvm/Transforms/Scalar/ExpandMemCmp.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -35,9 +36,6 @@
using namespace llvm;
using namespace llvm::PatternMatch;
-namespace llvm {
-class TargetLowering;
-}
#define DEBUG_TYPE "expand-memcmp"
@@ -305,6 +303,7 @@ unsigned MemCmpExpansion::getNumBlocks() {
}
void MemCmpExpansion::createLoadCmpBlocks() {
+ assert(ResBlock.BB && "ResBlock must be created before LoadCmpBlocks");
for (unsigned i = 0; i < getNumBlocks(); i++) {
BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
EndBlock->getParent(), EndBlock);
@@ -313,6 +312,7 @@ void MemCmpExpansion::createLoadCmpBlocks() {
}
void MemCmpExpansion::createResultBlock() {
+ assert(EndBlock && "EndBlock must be created before ResultBlock");
ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
EndBlock->getParent(), EndBlock);
}
@@ -828,9 +828,9 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
/// ret i32 %phi.res
static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
- const TargetLowering *TLI, const DataLayout *DL,
- ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
- DomTreeUpdater *DTU, const bool IsBCmp) {
+ const DataLayout *DL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, DomTreeUpdater *DTU,
+ const bool IsBCmp) {
NumMemCmpCalls++;
// Early exit from expansion if -Oz.
@@ -845,9 +845,7 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
}
const uint64_t SizeVal = SizeCast->getZExtValue();
- if (SizeVal == 0) {
- return false;
- }
+
// TTI call to check if target would like to expand memcmp. Also, get the
// available load sizes.
const bool IsUsedForZeroCmp =
@@ -857,28 +855,33 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
auto Options = TTI->enableMemCmpExpansion(OptForSize,
IsUsedForZeroCmp);
if (!Options) return false;
+ Value *Res = nullptr;
- if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
- Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
-
- if (OptForSize &&
- MaxLoadsPerMemcmpOptSize.getNumOccurrences())
- Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
+ if (SizeVal == 0) {
+ Res = ConstantInt::get(CI->getFunctionType()->getReturnType(), 0);
+ } else {
+ if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
+ Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
- if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
- Options.MaxNumLoads = MaxLoadsPerMemcmp;
+ if (OptForSize &&
+ MaxLoadsPerMemcmpOptSize.getNumOccurrences())
+ Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
- MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
+ if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
+ Options.MaxNumLoads = MaxLoadsPerMemcmp;
- // Don't expand if this will require more loads than desired by the target.
- if (Expansion.getNumLoads() == 0) {
- NumMemCmpGreaterThanMax++;
- return false;
- }
+ MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
- NumMemCmpInlined++;
+ // Don't expand if this will require more loads than desired by the target.
+ if (Expansion.getNumLoads() == 0) {
+ NumMemCmpGreaterThanMax++;
+ return false;
+ }
- if (Value *Res = Expansion.getMemCmpExpansion()) {
+ NumMemCmpInlined++;
+ Res = Expansion.getMemCmpExpansion();
+ }
+ if (Res) {
// Replace call with result of expansion and erase call.
CI->replaceAllUsesWith(Res);
CI->eraseFromParent();
@@ -889,62 +892,18 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
// Returns true if a change was made.
static bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, const TargetLowering *TL,
+ const TargetTransformInfo *TTI,
const DataLayout &DL, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, DomTreeUpdater *DTU);
static PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
- const TargetLowering *TL,
ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, DominatorTree *DT);
-class ExpandMemCmpLegacyPass : public FunctionPass {
-public:
- static char ID;
-
- ExpandMemCmpLegacyPass() : FunctionPass(ID) {
- initializeExpandMemCmpLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F)) return false;
-
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (!TPC) {
- return false;
- }
- const TargetLowering* TL =
- TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering();
-
- const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- const TargetTransformInfo *TTI =
- &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- auto *BFI = (PSI && PSI->hasProfileSummary()) ?
- &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
- nullptr;
- DominatorTree *DT = nullptr;
- if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
- DT = &DTWP->getDomTree();
- auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT);
- return !PA.areAllPreserved();
- }
-
-private:
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<ProfileSummaryInfoWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
- FunctionPass::getAnalysisUsage(AU);
- }
-};
bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, const TargetLowering *TL,
+ const TargetTransformInfo *TTI,
const DataLayout &DL, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, DomTreeUpdater *DTU) {
for (Instruction &I : BB) {
@@ -955,7 +914,7 @@ bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
LibFunc Func;
if (TLI->getLibFunc(*CI, Func) &&
(Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
- expandMemCmp(CI, TTI, TL, &DL, PS...
[truncated]
|
@llvm/pr-subscribers-backend-x86 Author: Gabriel Baraldi (gbaraldi) ChangesThis should allow for optimizations like saving repeated loads between memcmp calls. c.f https://godbolt.org/z/bEna4Md9r, the window on the right shows the generated output of this branch. One question is where to put this in the pipeline. The inline expansions probably benefit the most from the passes we run early, but we might want to wait a bit so that we can try and prove a constant argument to the memcmp (though I imagine if they are going to be constant, it's directly from the source). One other question is that some of the x86 tests are massive, I just ported over from the Patch is 4.02 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/77370.diff 83 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
index a7cbb0910baabf..556304231b397b 100644
--- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
@@ -25,7 +25,6 @@
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/CodeGen/CallBrPrepare.h"
#include "llvm/CodeGen/DwarfEHPrepare.h"
-#include "llvm/CodeGen/ExpandMemCmp.h"
#include "llvm/CodeGen/ExpandReductions.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/IndirectBrExpand.h"
@@ -629,15 +628,6 @@ void CodeGenPassBuilder<Derived>::addIRPasses(AddIRPass &addPass) const {
addPass(PrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n"));
}
- if (getOptLevel() != CodeGenOptLevel::None) {
- // The MergeICmpsPass tries to create memcmp calls by grouping sequences of
- // loads and compares. ExpandMemCmpPass then tries to expand those calls
- // into optimally-sized loads and compares. The transforms are enabled by a
- // target lowering hook.
- if (!Opt.DisableMergeICmps)
- addPass(MergeICmpsPass());
- addPass(ExpandMemCmpPass(&TM));
- }
// Run GC lowering passes for builtin collectors
// TODO: add a pass insertion point here
diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
index f950dfae7e338b..3c00668aae3897 100644
--- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def
+++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
@@ -47,7 +47,6 @@ FUNCTION_PASS("dwarf-eh-prepare", DwarfEHPreparePass, (TM))
FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false))
FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass, (TM))
FUNCTION_PASS("expand-large-fp-convert", ExpandLargeFpConvertPass, (TM))
-FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass, (TM))
FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ())
FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ())
FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass, (TM))
@@ -55,7 +54,6 @@ FUNCTION_PASS("interleaved-access", InterleavedAccessPass, (TM))
FUNCTION_PASS("interleaved-load-combine", InterleavedLoadCombinePass, (TM))
FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass, ())
FUNCTION_PASS("lowerinvoke", LowerInvokePass, ())
-FUNCTION_PASS("mergeicmps", MergeICmpsPass, ())
FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ())
FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true))
FUNCTION_PASS("replace-with-veclib", ReplaceWithVeclib, ())
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index ca9fbb1def7624..e5ed5f15f62ed7 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -519,8 +519,6 @@ namespace llvm {
// Expands large div/rem instructions.
FunctionPass *createExpandLargeFpConvertPass();
- // This pass expands memcmp() to load/stores.
- FunctionPass *createExpandMemCmpLegacyPass();
/// Creates Break False Dependencies pass. \see BreakFalseDeps.cpp
FunctionPass *createBreakFalseDeps();
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 46b1e95c3c15f3..b0ca9fa942cda3 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -103,7 +103,6 @@ void initializeEdgeBundlesPass(PassRegistry&);
void initializeEHContGuardCatchretPass(PassRegistry &);
void initializeExpandLargeFpConvertLegacyPassPass(PassRegistry&);
void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&);
-void initializeExpandMemCmpLegacyPassPass(PassRegistry &);
void initializeExpandPostRAPass(PassRegistry&);
void initializeExpandReductionsPass(PassRegistry&);
void initializeExpandVectorPredicationPass(PassRegistry &);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 7a21876e565a7c..9aff428fbe938b 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -119,7 +119,6 @@ namespace {
(void) llvm::createPostDomTree();
(void) llvm::createMergeICmpsLegacyPass();
(void) llvm::createExpandLargeDivRemPass();
- (void)llvm::createExpandMemCmpLegacyPass();
(void) llvm::createExpandVectorPredicationPass();
std::string buf;
llvm::raw_string_ostream os(buf);
diff --git a/llvm/include/llvm/CodeGen/ExpandMemCmp.h b/llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h
similarity index 83%
rename from llvm/include/llvm/CodeGen/ExpandMemCmp.h
rename to llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h
index 94a877854f327a..94ba0cf9305040 100644
--- a/llvm/include/llvm/CodeGen/ExpandMemCmp.h
+++ b/llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_EXPANDMEMCMP_H
-#define LLVM_CODEGEN_EXPANDMEMCMP_H
+#ifndef LLVM_TRANSFORMS_SCALAR_EXPANDMEMCMP_H
+#define LLVM_TRANSFORMS_SCALAR_EXPANDMEMCMP_H
#include "llvm/IR/PassManager.h"
@@ -26,4 +26,4 @@ class ExpandMemCmpPass : public PassInfoMixin<ExpandMemCmpPass> {
} // namespace llvm
-#endif // LLVM_CODEGEN_EXPANDMEMCMP_H
+#endif // LLVM_TRANSFORMS_SCALAR_EXPANDMEMCMP_H
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index df2d1831ee5fdb..518432e9a7b32f 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -71,7 +71,6 @@ add_llvm_component_library(LLVMCodeGen
ExecutionDomainFix.cpp
ExpandLargeDivRem.cpp
ExpandLargeFpConvert.cpp
- ExpandMemCmp.cpp
ExpandPostRAPseudos.cpp
ExpandReductions.cpp
ExpandVectorPredication.cpp
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 7b73a7b11ddf1c..043fa4e6eabe8f 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -41,7 +41,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeEarlyTailDuplicatePass(Registry);
initializeExpandLargeDivRemLegacyPassPass(Registry);
initializeExpandLargeFpConvertLegacyPassPass(Registry);
- initializeExpandMemCmpLegacyPassPass(Registry);
initializeExpandPostRAPass(Registry);
initializeFEntryInserterPass(Registry);
initializeFinalizeISelPass(Registry);
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 4003a08a5422dd..33562e90e94426 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -108,9 +108,6 @@ static cl::opt<bool> EnableImplicitNullChecks(
"enable-implicit-null-checks",
cl::desc("Fold null checks into faulting memory operations"),
cl::init(false), cl::Hidden);
-static cl::opt<bool> DisableMergeICmps("disable-mergeicmps",
- cl::desc("Disable MergeICmps Pass"),
- cl::init(false), cl::Hidden);
static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
cl::desc("Print LLVM IR produced by the loop-reduce pass"));
static cl::opt<bool>
@@ -487,7 +484,6 @@ CGPassBuilderOption llvm::getCGPassBuilderOption() {
SET_BOOLEAN_OPTION(EnableImplicitNullChecks)
SET_BOOLEAN_OPTION(EnableMachineOutliner)
SET_BOOLEAN_OPTION(MISchedPostRA)
- SET_BOOLEAN_OPTION(DisableMergeICmps)
SET_BOOLEAN_OPTION(DisableLSR)
SET_BOOLEAN_OPTION(DisableConstantHoisting)
SET_BOOLEAN_OPTION(DisableCGP)
@@ -872,13 +868,6 @@ void TargetPassConfig::addIRPasses() {
"\n\n*** Code after LSR ***\n"));
}
- // The MergeICmpsPass tries to create memcmp calls by grouping sequences of
- // loads and compares. ExpandMemCmpPass then tries to expand those calls
- // into optimally-sized loads and compares. The transforms are enabled by a
- // target lowering hook.
- if (!DisableMergeICmps)
- addPass(createMergeICmpsLegacyPass());
- addPass(createExpandMemCmpLegacyPass());
}
// Run GC lowering passes for builtin collectors
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 439f749bda8bb7..20448554756aca 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -76,7 +76,6 @@
#include "llvm/CodeGen/DwarfEHPrepare.h"
#include "llvm/CodeGen/ExpandLargeDivRem.h"
#include "llvm/CodeGen/ExpandLargeFpConvert.h"
-#include "llvm/CodeGen/ExpandMemCmp.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/HardwareLoops.h"
#include "llvm/CodeGen/IndirectBrExpand.h"
@@ -181,6 +180,7 @@
#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
#include "llvm/Transforms/Scalar/DivRemPairs.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
+#include "llvm/Transforms/Scalar/ExpandMemCmp.h"
#include "llvm/Transforms/Scalar/FlattenCFG.h"
#include "llvm/Transforms/Scalar/Float2Int.h"
#include "llvm/Transforms/Scalar/GVN.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 5c6c391049a7b2..e2dd413f12d696 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -86,6 +86,7 @@
#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
#include "llvm/Transforms/Scalar/DivRemPairs.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
+#include "llvm/Transforms/Scalar/ExpandMemCmp.h"
#include "llvm/Transforms/Scalar/Float2Int.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/IndVarSimplify.h"
@@ -111,6 +112,7 @@
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
+#include "llvm/Transforms/Scalar/MergeICmps.h"
#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
#include "llvm/Transforms/Scalar/NewGVN.h"
#include "llvm/Transforms/Scalar/Reassociate.h"
@@ -386,6 +388,8 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
if (AreStatisticsEnabled())
FPM.addPass(CountVisitsPass());
+ FPM.addPass(MergeICmpsPass());
+ FPM.addPass(ExpandMemCmpPass(TM));
// Form SSA out of local memory accesses after breaking apart aggregates into
// scalars.
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
@@ -532,6 +536,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
if (AreStatisticsEnabled())
FPM.addPass(CountVisitsPass());
+ FPM.addPass(MergeICmpsPass());
+ FPM.addPass(ExpandMemCmpPass(TM));
// Form SSA out of local memory accesses after breaking apart aggregates into
// scalars.
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 82ce040c649626..31adbf1942b410 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -353,6 +353,7 @@ FUNCTION_PASS("mem2reg", PromotePass())
FUNCTION_PASS("memcpyopt", MemCpyOptPass())
FUNCTION_PASS("memprof", MemProfilerPass())
FUNCTION_PASS("mergeicmps", MergeICmpsPass())
+FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass(TM))
FUNCTION_PASS("mergereturn", UnifyFunctionExitNodesPass())
FUNCTION_PASS("move-auto-init", MoveAutoInitPass())
FUNCTION_PASS("nary-reassociate", NaryReassociatePass())
@@ -415,7 +416,7 @@ FUNCTION_PASS("structurizecfg", StructurizeCFGPass())
FUNCTION_PASS("tailcallelim", TailCallElimPass())
FUNCTION_PASS("tlshoist", TLSVariableHoistPass())
FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
-FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass())
+FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass())
FUNCTION_PASS("tsan", ThreadSanitizerPass())
FUNCTION_PASS("typepromotion", TypePromotionPass(TM))
FUNCTION_PASS("unify-loop-exits", UnifyLoopExitsPass())
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index 2dd27037a17de7..f6e666dd071256 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -11,6 +11,7 @@ add_llvm_component_library(LLVMScalarOpts
DeadStoreElimination.cpp
DFAJumpThreading.cpp
DivRemPairs.cpp
+ ExpandMemCmp.cpp
EarlyCSE.cpp
FlattenCFGPass.cpp
Float2Int.cpp
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
similarity index 90%
rename from llvm/lib/CodeGen/ExpandMemCmp.cpp
rename to llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
index bb84813569f4d5..973875ee142978 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
@@ -11,21 +11,22 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/ExpandMemCmp.h"
+#include "llvm/Transforms/Scalar/ExpandMemCmp.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -35,9 +36,6 @@
using namespace llvm;
using namespace llvm::PatternMatch;
-namespace llvm {
-class TargetLowering;
-}
#define DEBUG_TYPE "expand-memcmp"
@@ -305,6 +303,7 @@ unsigned MemCmpExpansion::getNumBlocks() {
}
void MemCmpExpansion::createLoadCmpBlocks() {
+ assert(ResBlock.BB && "ResBlock must be created before LoadCmpBlocks");
for (unsigned i = 0; i < getNumBlocks(); i++) {
BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
EndBlock->getParent(), EndBlock);
@@ -313,6 +312,7 @@ void MemCmpExpansion::createLoadCmpBlocks() {
}
void MemCmpExpansion::createResultBlock() {
+ assert(EndBlock && "EndBlock must be created before ResultBlock");
ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
EndBlock->getParent(), EndBlock);
}
@@ -828,9 +828,9 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
/// ret i32 %phi.res
static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
- const TargetLowering *TLI, const DataLayout *DL,
- ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
- DomTreeUpdater *DTU, const bool IsBCmp) {
+ const DataLayout *DL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, DomTreeUpdater *DTU,
+ const bool IsBCmp) {
NumMemCmpCalls++;
// Early exit from expansion if -Oz.
@@ -845,9 +845,7 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
}
const uint64_t SizeVal = SizeCast->getZExtValue();
- if (SizeVal == 0) {
- return false;
- }
+
// TTI call to check if target would like to expand memcmp. Also, get the
// available load sizes.
const bool IsUsedForZeroCmp =
@@ -857,28 +855,33 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
auto Options = TTI->enableMemCmpExpansion(OptForSize,
IsUsedForZeroCmp);
if (!Options) return false;
+ Value *Res = nullptr;
- if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
- Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
-
- if (OptForSize &&
- MaxLoadsPerMemcmpOptSize.getNumOccurrences())
- Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
+ if (SizeVal == 0) {
+ Res = ConstantInt::get(CI->getFunctionType()->getReturnType(), 0);
+ } else {
+ if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
+ Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
- if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
- Options.MaxNumLoads = MaxLoadsPerMemcmp;
+ if (OptForSize &&
+ MaxLoadsPerMemcmpOptSize.getNumOccurrences())
+ Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
- MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
+ if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
+ Options.MaxNumLoads = MaxLoadsPerMemcmp;
- // Don't expand if this will require more loads than desired by the target.
- if (Expansion.getNumLoads() == 0) {
- NumMemCmpGreaterThanMax++;
- return false;
- }
+ MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
- NumMemCmpInlined++;
+ // Don't expand if this will require more loads than desired by the target.
+ if (Expansion.getNumLoads() == 0) {
+ NumMemCmpGreaterThanMax++;
+ return false;
+ }
- if (Value *Res = Expansion.getMemCmpExpansion()) {
+ NumMemCmpInlined++;
+ Res = Expansion.getMemCmpExpansion();
+ }
+ if (Res) {
// Replace call with result of expansion and erase call.
CI->replaceAllUsesWith(Res);
CI->eraseFromParent();
@@ -889,62 +892,18 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
// Returns true if a change was made.
static bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, const TargetLowering *TL,
+ const TargetTransformInfo *TTI,
const DataLayout &DL, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, DomTreeUpdater *DTU);
static PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
- const TargetLowering *TL,
ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, DominatorTree *DT);
-class ExpandMemCmpLegacyPass : public FunctionPass {
-public:
- static char ID;
-
- ExpandMemCmpLegacyPass() : FunctionPass(ID) {
- initializeExpandMemCmpLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F)) return false;
-
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (!TPC) {
- return false;
- }
- const TargetLowering* TL =
- TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering();
-
- const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- const TargetTransformInfo *TTI =
- &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- auto *BFI = (PSI && PSI->hasProfileSummary()) ?
- &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
- nullptr;
- DominatorTree *DT = nullptr;
- if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
- DT = &DTWP->getDomTree();
- auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT);
- return !PA.areAllPreserved();
- }
-
-private:
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<ProfileSummaryInfoWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
- FunctionPass::getAnalysisUsage(AU);
- }
-};
bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, const TargetLowering *TL,
+ const TargetTransformInfo *TTI,
const DataLayout &DL, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, DomTreeUpdater *DTU) {
for (Instruction &I : BB) {
@@ -955,7 +914,7 @@ bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
LibFunc Func;
if (TLI->getLibFunc(*CI, Func) &&
(Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
- expandMemCmp(CI, TTI, TL, &DL, PS...
[truncated]
|
@llvm/pr-subscribers-backend-m68k Author: Gabriel Baraldi (gbaraldi) ChangesThis should allow for optimizations like saving repeated loads between memcmp calls. c.f https://godbolt.org/z/bEna4Md9r, the window on the right shows the generated output of this branch. One question is where to put this in the pipeline. The inline expansions probably benefit the most from the passes we run early, but we might want to wait a bit so that we can try and prove a constant argument to the memcmp (though I imagine if they are going to be constant, it's directly from the source). One other question is that some of the x86 tests are massive, I just ported over from the Patch is 4.02 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/77370.diff 83 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
index a7cbb0910baabf..556304231b397b 100644
--- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
@@ -25,7 +25,6 @@
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/CodeGen/CallBrPrepare.h"
#include "llvm/CodeGen/DwarfEHPrepare.h"
-#include "llvm/CodeGen/ExpandMemCmp.h"
#include "llvm/CodeGen/ExpandReductions.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/IndirectBrExpand.h"
@@ -629,15 +628,6 @@ void CodeGenPassBuilder<Derived>::addIRPasses(AddIRPass &addPass) const {
addPass(PrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n"));
}
- if (getOptLevel() != CodeGenOptLevel::None) {
- // The MergeICmpsPass tries to create memcmp calls by grouping sequences of
- // loads and compares. ExpandMemCmpPass then tries to expand those calls
- // into optimally-sized loads and compares. The transforms are enabled by a
- // target lowering hook.
- if (!Opt.DisableMergeICmps)
- addPass(MergeICmpsPass());
- addPass(ExpandMemCmpPass(&TM));
- }
// Run GC lowering passes for builtin collectors
// TODO: add a pass insertion point here
diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
index f950dfae7e338b..3c00668aae3897 100644
--- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def
+++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
@@ -47,7 +47,6 @@ FUNCTION_PASS("dwarf-eh-prepare", DwarfEHPreparePass, (TM))
FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false))
FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass, (TM))
FUNCTION_PASS("expand-large-fp-convert", ExpandLargeFpConvertPass, (TM))
-FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass, (TM))
FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ())
FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ())
FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass, (TM))
@@ -55,7 +54,6 @@ FUNCTION_PASS("interleaved-access", InterleavedAccessPass, (TM))
FUNCTION_PASS("interleaved-load-combine", InterleavedLoadCombinePass, (TM))
FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass, ())
FUNCTION_PASS("lowerinvoke", LowerInvokePass, ())
-FUNCTION_PASS("mergeicmps", MergeICmpsPass, ())
FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ())
FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true))
FUNCTION_PASS("replace-with-veclib", ReplaceWithVeclib, ())
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index ca9fbb1def7624..e5ed5f15f62ed7 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -519,8 +519,6 @@ namespace llvm {
// Expands large div/rem instructions.
FunctionPass *createExpandLargeFpConvertPass();
- // This pass expands memcmp() to load/stores.
- FunctionPass *createExpandMemCmpLegacyPass();
/// Creates Break False Dependencies pass. \see BreakFalseDeps.cpp
FunctionPass *createBreakFalseDeps();
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 46b1e95c3c15f3..b0ca9fa942cda3 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -103,7 +103,6 @@ void initializeEdgeBundlesPass(PassRegistry&);
void initializeEHContGuardCatchretPass(PassRegistry &);
void initializeExpandLargeFpConvertLegacyPassPass(PassRegistry&);
void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&);
-void initializeExpandMemCmpLegacyPassPass(PassRegistry &);
void initializeExpandPostRAPass(PassRegistry&);
void initializeExpandReductionsPass(PassRegistry&);
void initializeExpandVectorPredicationPass(PassRegistry &);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 7a21876e565a7c..9aff428fbe938b 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -119,7 +119,6 @@ namespace {
(void) llvm::createPostDomTree();
(void) llvm::createMergeICmpsLegacyPass();
(void) llvm::createExpandLargeDivRemPass();
- (void)llvm::createExpandMemCmpLegacyPass();
(void) llvm::createExpandVectorPredicationPass();
std::string buf;
llvm::raw_string_ostream os(buf);
diff --git a/llvm/include/llvm/CodeGen/ExpandMemCmp.h b/llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h
similarity index 83%
rename from llvm/include/llvm/CodeGen/ExpandMemCmp.h
rename to llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h
index 94a877854f327a..94ba0cf9305040 100644
--- a/llvm/include/llvm/CodeGen/ExpandMemCmp.h
+++ b/llvm/include/llvm/Transforms/Scalar/ExpandMemCmp.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_EXPANDMEMCMP_H
-#define LLVM_CODEGEN_EXPANDMEMCMP_H
+#ifndef LLVM_TRANSFORMS_SCALAR_EXPANDMEMCMP_H
+#define LLVM_TRANSFORMS_SCALAR_EXPANDMEMCMP_H
#include "llvm/IR/PassManager.h"
@@ -26,4 +26,4 @@ class ExpandMemCmpPass : public PassInfoMixin<ExpandMemCmpPass> {
} // namespace llvm
-#endif // LLVM_CODEGEN_EXPANDMEMCMP_H
+#endif // LLVM_TRANSFORMS_SCALAR_EXPANDMEMCMP_H
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index df2d1831ee5fdb..518432e9a7b32f 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -71,7 +71,6 @@ add_llvm_component_library(LLVMCodeGen
ExecutionDomainFix.cpp
ExpandLargeDivRem.cpp
ExpandLargeFpConvert.cpp
- ExpandMemCmp.cpp
ExpandPostRAPseudos.cpp
ExpandReductions.cpp
ExpandVectorPredication.cpp
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 7b73a7b11ddf1c..043fa4e6eabe8f 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -41,7 +41,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeEarlyTailDuplicatePass(Registry);
initializeExpandLargeDivRemLegacyPassPass(Registry);
initializeExpandLargeFpConvertLegacyPassPass(Registry);
- initializeExpandMemCmpLegacyPassPass(Registry);
initializeExpandPostRAPass(Registry);
initializeFEntryInserterPass(Registry);
initializeFinalizeISelPass(Registry);
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 4003a08a5422dd..33562e90e94426 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -108,9 +108,6 @@ static cl::opt<bool> EnableImplicitNullChecks(
"enable-implicit-null-checks",
cl::desc("Fold null checks into faulting memory operations"),
cl::init(false), cl::Hidden);
-static cl::opt<bool> DisableMergeICmps("disable-mergeicmps",
- cl::desc("Disable MergeICmps Pass"),
- cl::init(false), cl::Hidden);
static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
cl::desc("Print LLVM IR produced by the loop-reduce pass"));
static cl::opt<bool>
@@ -487,7 +484,6 @@ CGPassBuilderOption llvm::getCGPassBuilderOption() {
SET_BOOLEAN_OPTION(EnableImplicitNullChecks)
SET_BOOLEAN_OPTION(EnableMachineOutliner)
SET_BOOLEAN_OPTION(MISchedPostRA)
- SET_BOOLEAN_OPTION(DisableMergeICmps)
SET_BOOLEAN_OPTION(DisableLSR)
SET_BOOLEAN_OPTION(DisableConstantHoisting)
SET_BOOLEAN_OPTION(DisableCGP)
@@ -872,13 +868,6 @@ void TargetPassConfig::addIRPasses() {
"\n\n*** Code after LSR ***\n"));
}
- // The MergeICmpsPass tries to create memcmp calls by grouping sequences of
- // loads and compares. ExpandMemCmpPass then tries to expand those calls
- // into optimally-sized loads and compares. The transforms are enabled by a
- // target lowering hook.
- if (!DisableMergeICmps)
- addPass(createMergeICmpsLegacyPass());
- addPass(createExpandMemCmpLegacyPass());
}
// Run GC lowering passes for builtin collectors
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 439f749bda8bb7..20448554756aca 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -76,7 +76,6 @@
#include "llvm/CodeGen/DwarfEHPrepare.h"
#include "llvm/CodeGen/ExpandLargeDivRem.h"
#include "llvm/CodeGen/ExpandLargeFpConvert.h"
-#include "llvm/CodeGen/ExpandMemCmp.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/HardwareLoops.h"
#include "llvm/CodeGen/IndirectBrExpand.h"
@@ -181,6 +180,7 @@
#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
#include "llvm/Transforms/Scalar/DivRemPairs.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
+#include "llvm/Transforms/Scalar/ExpandMemCmp.h"
#include "llvm/Transforms/Scalar/FlattenCFG.h"
#include "llvm/Transforms/Scalar/Float2Int.h"
#include "llvm/Transforms/Scalar/GVN.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 5c6c391049a7b2..e2dd413f12d696 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -86,6 +86,7 @@
#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
#include "llvm/Transforms/Scalar/DivRemPairs.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
+#include "llvm/Transforms/Scalar/ExpandMemCmp.h"
#include "llvm/Transforms/Scalar/Float2Int.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/IndVarSimplify.h"
@@ -111,6 +112,7 @@
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
+#include "llvm/Transforms/Scalar/MergeICmps.h"
#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
#include "llvm/Transforms/Scalar/NewGVN.h"
#include "llvm/Transforms/Scalar/Reassociate.h"
@@ -386,6 +388,8 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
if (AreStatisticsEnabled())
FPM.addPass(CountVisitsPass());
+ FPM.addPass(MergeICmpsPass());
+ FPM.addPass(ExpandMemCmpPass(TM));
// Form SSA out of local memory accesses after breaking apart aggregates into
// scalars.
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
@@ -532,6 +536,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
if (AreStatisticsEnabled())
FPM.addPass(CountVisitsPass());
+ FPM.addPass(MergeICmpsPass());
+ FPM.addPass(ExpandMemCmpPass(TM));
// Form SSA out of local memory accesses after breaking apart aggregates into
// scalars.
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 82ce040c649626..31adbf1942b410 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -353,6 +353,7 @@ FUNCTION_PASS("mem2reg", PromotePass())
FUNCTION_PASS("memcpyopt", MemCpyOptPass())
FUNCTION_PASS("memprof", MemProfilerPass())
FUNCTION_PASS("mergeicmps", MergeICmpsPass())
+FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass(TM))
FUNCTION_PASS("mergereturn", UnifyFunctionExitNodesPass())
FUNCTION_PASS("move-auto-init", MoveAutoInitPass())
FUNCTION_PASS("nary-reassociate", NaryReassociatePass())
@@ -415,7 +416,7 @@ FUNCTION_PASS("structurizecfg", StructurizeCFGPass())
FUNCTION_PASS("tailcallelim", TailCallElimPass())
FUNCTION_PASS("tlshoist", TLSVariableHoistPass())
FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
-FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass())
+FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass())
FUNCTION_PASS("tsan", ThreadSanitizerPass())
FUNCTION_PASS("typepromotion", TypePromotionPass(TM))
FUNCTION_PASS("unify-loop-exits", UnifyLoopExitsPass())
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index 2dd27037a17de7..f6e666dd071256 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -11,6 +11,7 @@ add_llvm_component_library(LLVMScalarOpts
DeadStoreElimination.cpp
DFAJumpThreading.cpp
DivRemPairs.cpp
+ ExpandMemCmp.cpp
EarlyCSE.cpp
FlattenCFGPass.cpp
Float2Int.cpp
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
similarity index 90%
rename from llvm/lib/CodeGen/ExpandMemCmp.cpp
rename to llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
index bb84813569f4d5..973875ee142978 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/Transforms/Scalar/ExpandMemCmp.cpp
@@ -11,21 +11,22 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/ExpandMemCmp.h"
+#include "llvm/Transforms/Scalar/ExpandMemCmp.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -35,9 +36,6 @@
using namespace llvm;
using namespace llvm::PatternMatch;
-namespace llvm {
-class TargetLowering;
-}
#define DEBUG_TYPE "expand-memcmp"
@@ -305,6 +303,7 @@ unsigned MemCmpExpansion::getNumBlocks() {
}
void MemCmpExpansion::createLoadCmpBlocks() {
+ assert(ResBlock.BB && "ResBlock must be created before LoadCmpBlocks");
for (unsigned i = 0; i < getNumBlocks(); i++) {
BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
EndBlock->getParent(), EndBlock);
@@ -313,6 +312,7 @@ void MemCmpExpansion::createLoadCmpBlocks() {
}
void MemCmpExpansion::createResultBlock() {
+ assert(EndBlock && "EndBlock must be created before ResultBlock");
ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
EndBlock->getParent(), EndBlock);
}
@@ -828,9 +828,9 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
/// ret i32 %phi.res
static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
- const TargetLowering *TLI, const DataLayout *DL,
- ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
- DomTreeUpdater *DTU, const bool IsBCmp) {
+ const DataLayout *DL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, DomTreeUpdater *DTU,
+ const bool IsBCmp) {
NumMemCmpCalls++;
// Early exit from expansion if -Oz.
@@ -845,9 +845,7 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
}
const uint64_t SizeVal = SizeCast->getZExtValue();
- if (SizeVal == 0) {
- return false;
- }
+
// TTI call to check if target would like to expand memcmp. Also, get the
// available load sizes.
const bool IsUsedForZeroCmp =
@@ -857,28 +855,33 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
auto Options = TTI->enableMemCmpExpansion(OptForSize,
IsUsedForZeroCmp);
if (!Options) return false;
+ Value *Res = nullptr;
- if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
- Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
-
- if (OptForSize &&
- MaxLoadsPerMemcmpOptSize.getNumOccurrences())
- Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
+ if (SizeVal == 0) {
+ Res = ConstantInt::get(CI->getFunctionType()->getReturnType(), 0);
+ } else {
+ if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
+ Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
- if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
- Options.MaxNumLoads = MaxLoadsPerMemcmp;
+ if (OptForSize &&
+ MaxLoadsPerMemcmpOptSize.getNumOccurrences())
+ Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
- MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
+ if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
+ Options.MaxNumLoads = MaxLoadsPerMemcmp;
- // Don't expand if this will require more loads than desired by the target.
- if (Expansion.getNumLoads() == 0) {
- NumMemCmpGreaterThanMax++;
- return false;
- }
+ MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
- NumMemCmpInlined++;
+ // Don't expand if this will require more loads than desired by the target.
+ if (Expansion.getNumLoads() == 0) {
+ NumMemCmpGreaterThanMax++;
+ return false;
+ }
- if (Value *Res = Expansion.getMemCmpExpansion()) {
+ NumMemCmpInlined++;
+ Res = Expansion.getMemCmpExpansion();
+ }
+ if (Res) {
// Replace call with result of expansion and erase call.
CI->replaceAllUsesWith(Res);
CI->eraseFromParent();
@@ -889,62 +892,18 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
// Returns true if a change was made.
static bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, const TargetLowering *TL,
+ const TargetTransformInfo *TTI,
const DataLayout &DL, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, DomTreeUpdater *DTU);
static PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
- const TargetLowering *TL,
ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, DominatorTree *DT);
-class ExpandMemCmpLegacyPass : public FunctionPass {
-public:
- static char ID;
-
- ExpandMemCmpLegacyPass() : FunctionPass(ID) {
- initializeExpandMemCmpLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F)) return false;
-
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (!TPC) {
- return false;
- }
- const TargetLowering* TL =
- TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering();
-
- const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- const TargetTransformInfo *TTI =
- &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- auto *BFI = (PSI && PSI->hasProfileSummary()) ?
- &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
- nullptr;
- DominatorTree *DT = nullptr;
- if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
- DT = &DTWP->getDomTree();
- auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT);
- return !PA.areAllPreserved();
- }
-
-private:
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<ProfileSummaryInfoWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
- FunctionPass::getAnalysisUsage(AU);
- }
-};
bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, const TargetLowering *TL,
+ const TargetTransformInfo *TTI,
const DataLayout &DL, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, DomTreeUpdater *DTU) {
for (Instruction &I : BB) {
@@ -955,7 +914,7 @@ bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
LibFunc Func;
if (TLI->getLibFunc(*CI, Func) &&
(Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
- expandMemCmp(CI, TTI, TL, &DL, PS...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
From the previous review:
Have you addressed this somehow? |
So what confused me about that concern was that we don't care about this for memcpy for example, where if instcombine decides to do the inline expansion, it just happens, asan/msan notwidthstanding. But if not legal we can probably use the |
I don't really know what the issue was, exactly. The review doesn't say, and I don't have any further information. We may want to disable memcmp expansion when msan is enabled, to allow the specialized diagnostics to trigger. I am a little concerned about the semantics of uninitialized memory... currently, we assume that memcmp stops comparing after the first non-equal byte. And the current expansion doesn't try to freeze the loaded values. (See LibCallSimplifier::optimizeStrCmp .) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm surprised by the volume of test changes
I think I remember that one of these issues was that sanitizers would insert instrumentation around calls to When There was also the issue mentionned by +emerson:
|
I just copied/ported all the tests we had that tested memcmp expansion, which apparently was quite a bit. Though I guess there’s now quite a bit of repetition. |
What is the "recommended" way of running the benchmarks, I ran |
Notably there are regressions in stage2-O3 (but not stage1-O3), which usually indicates an optimization quality regression that affects clang itself. |
I'd probably recommend to start conservatively and add the passes at the end of the module optimization pipeline, before the last SimplifyCFG invocation (in the vicinity of ExpandDivRem). This won't address your motivating case, but it will allow us to iron out issues from moving these passes to the middle-end, while still mostly keeping their current pipeline position. Once they are in the middle-end pipeline, it will be easier to evaluate different positions. |
@nikic could you rerun the compile time benchmarks? |
@gbaraldi New compile-time numbers: https://llvm-compile-time-tracker.com/compare.php?from=ecd47811b755d13357085bcd7519a66d6c4d8e5c&to=f582b874c35e2b90046da8e8c30a7fa30ba08167&stat=instructions:u (They are mildly positive now, and there are some text size reductions.) |
That's encouraging at least. |
What is needed for merging this? (Do the compiler-rt/sanitizer tests run in CI)? Apparently they used to be an issue with this change. |
… optimization of the inline expansions
150e6b0
to
58d3c12
Compare
Ok, it seems CI is finally a bit happier, not sure what was going on. @nikic what do you think is needed to get this in? |
Gentle bump |
bump @nikic :) |
Bump! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This needs a rebase. I think this looks basically fine, but I am somewhat concerned about the test coverage. The ExpandMemCmp pass produces IR instructions that are supposed to produce a good lowering on a given target, and what this patch does is convert these to plain IR tests -- that matches our usual testing model, but I also think that it makes it very hard to verify that the produced asm will be good and won't regress based on changes in the backend.
For that reason, I think it would make sense to switch some of the tests (like AArch64/memcmp.extra.ll) to use something like opt -S -passes=expand-memcmp | llc
.
if not "BPF" in config.root.targets: | ||
config.unsupported = True | ||
if "system-aix" in config.available_features: | ||
config.unsupported = True |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why this system-aix check?
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 | ||
; RUN: opt -S -passes=expand-memcmp -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s | ||
|
||
; This tests interaction between MergeICmp and expand-memcmp. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does it? This test looks broken to me.
; CHECK-BE-NEXT: [[LOAD1:%[0-9]+]] = load i64, ptr [[GEP1]] | ||
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, ptr [[GEP2]] | ||
; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]] | ||
; CHECK-BE-NEXT: br i1 [[ICMP]], label %endblock, label %res_block |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Leftover check lines?
@@ -0,0 +1,2 @@ | |||
if not 'PowerPC' in config.root.targets: | |||
config.unsupported = True |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doesn't appear to be used?
This should allow for optimizations like saving repeated loads between memcmp calls. c.f https://godbolt.org/z/bEna4Md9r, the window on the right shows the generated output of this branch.
One question is where to put this in the pipeline. The inline expansions probably benefit the most from the passes we run early, but we might want to wait a bit so that we can try and prove a constant argument to the memcmp (though I imagine if they are going to be constant, it's directly from the source).
One other question is that some of the x86 tests are massive, I just ported over from the
llc
test, but they grew quite a bit more.This builds on work done by @legrosbuffle in https://reviews.llvm.org/D60318