From 6640df94f9abd4f9fef0263afbf7978ac55832b8 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 11 Jul 2023 19:30:53 -0400 Subject: [PATCH] ValueTracking: Remove CannotBeOrderedLessThanZero Replace the last user of CannotBeOrderedLessThanZero with new version. Makes assumes work in this case. --- .../AggressiveInstCombine.cpp | 13 ++++++++----- .../Transforms/AggressiveInstCombine/X86/sqrt.ll | 16 +++++++++++++++- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 952043cefe244..503ce019dc843 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -400,7 +400,8 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) { /// pessimistic codegen that has to account for setting errno and can enable /// vectorization. static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI, - TargetLibraryInfo &TLI) { + TargetLibraryInfo &TLI, AssumptionCache &AC, + DominatorTree &DT) { // Match a call to sqrt mathlib function. auto *Call = dyn_cast(&I); if (!Call) @@ -424,7 +425,8 @@ static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI, Value *Arg = Call->getArgOperand(0); if (TTI.haveFastSqrt(Ty) && (Call->hasNoNaNs() || - CannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI))) { + cannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI, 0, &AC, &I, + &DT))) { IRBuilder<> Builder(&I); IRBuilderBase::FastMathFlagGuard Guard(Builder); Builder.setFastMathFlags(Call->getFastMathFlags()); @@ -918,7 +920,8 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) { /// occur frequently and/or have more than a constant-length pattern match. static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, - TargetLibraryInfo &TLI, AliasAnalysis &AA) { + TargetLibraryInfo &TLI, AliasAnalysis &AA, + AssumptionCache &AC) { bool MadeChange = false; for (BasicBlock &BB : F) { // Ignore unreachable basic blocks. @@ -943,7 +946,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT, // NOTE: This function introduces erasing of the instruction `I`, so it // needs to be called at the end of this sequence, otherwise we may make // bugs. - MadeChange |= foldSqrt(I, TTI, TLI); + MadeChange |= foldSqrt(I, TTI, TLI, AC, DT); } } @@ -964,7 +967,7 @@ static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI, const DataLayout &DL = F.getParent()->getDataLayout(); TruncInstCombine TIC(AC, TLI, DL, DT); MadeChange |= TIC.run(F); - MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA); + MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC); return MadeChange; } diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/sqrt.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/sqrt.ll index 665f6de96d932..d54e53a21bbf7 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/X86/sqrt.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/X86/sqrt.ll @@ -5,6 +5,7 @@ declare float @sqrtf(float) declare double @sqrt(double) declare fp128 @sqrtl(fp128) declare float @llvm.fabs.f32(float) +declare void @llvm.assume(i1 noundef) ; "nnan" implies no setting of errno and the target can lower this to an ; instruction, so transform to an intrinsic. @@ -46,7 +47,7 @@ define fp128 @sqrt_call_nnan_f128(fp128 %x) { define float @sqrt_call_nnan_f32_nobuiltin(float %x) { ; CHECK-LABEL: @sqrt_call_nnan_f32_nobuiltin( -; CHECK-NEXT: [[SQRT:%.*]] = call nnan float @sqrtf(float [[X:%.*]]) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: [[SQRT:%.*]] = call nnan float @sqrtf(float [[X:%.*]]) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: ret float [[SQRT]] ; %sqrt = call nnan float @sqrtf(float %x) nobuiltin @@ -74,3 +75,16 @@ define float @sqrt_call_f32_fabs(float %x) { %sqrt = call float @sqrtf(float %a) ret float %sqrt } + +define float @sqrt_call_f32_assume_oge_n0(float %x) { +; CHECK-LABEL: @sqrt_call_f32_assume_oge_n0( +; CHECK-NEXT: [[IS_POS:%.*]] = fcmp oge float [[X:%.*]], -0.000000e+00 +; CHECK-NEXT: call void @llvm.assume(i1 [[IS_POS]]) +; CHECK-NEXT: [[SQRT1:%.*]] = call float @llvm.sqrt.f32(float [[X]]) +; CHECK-NEXT: ret float [[SQRT1]] +; + %is.pos = fcmp oge float %x, -0.0 + call void @llvm.assume(i1 %is.pos) + %sqrt = call float @sqrtf(float %x) + ret float %sqrt +}