diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h index 11bfd733a8854..cc4929a1ff8da 100644 --- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h +++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h @@ -414,6 +414,178 @@ inline DenormalMode GetNVVMDenormMode(bool ShouldFTZ) { return DenormalMode::getIEEE(); } +inline bool FAddShouldFTZ(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + case Intrinsic::nvvm_add_rm_ftz_f: + case Intrinsic::nvvm_add_rn_ftz_f: + case Intrinsic::nvvm_add_rp_ftz_f: + case Intrinsic::nvvm_add_rz_ftz_f: + return true; + + case Intrinsic::nvvm_add_rm_f: + case Intrinsic::nvvm_add_rn_f: + case Intrinsic::nvvm_add_rp_f: + case Intrinsic::nvvm_add_rz_f: + case Intrinsic::nvvm_add_rm_d: + case Intrinsic::nvvm_add_rn_d: + case Intrinsic::nvvm_add_rp_d: + case Intrinsic::nvvm_add_rz_d: + return false; + } + llvm_unreachable("Checking FTZ flag for invalid NVVM add intrinsic"); +} + +inline APFloat::roundingMode GetFAddRoundingMode(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + case Intrinsic::nvvm_add_rm_f: + case Intrinsic::nvvm_add_rm_d: + case Intrinsic::nvvm_add_rm_ftz_f: + return APFloat::rmTowardNegative; + case Intrinsic::nvvm_add_rn_f: + case Intrinsic::nvvm_add_rn_d: + case Intrinsic::nvvm_add_rn_ftz_f: + return APFloat::rmNearestTiesToEven; + case Intrinsic::nvvm_add_rp_f: + case Intrinsic::nvvm_add_rp_d: + case Intrinsic::nvvm_add_rp_ftz_f: + return APFloat::rmTowardPositive; + case Intrinsic::nvvm_add_rz_f: + case Intrinsic::nvvm_add_rz_d: + case Intrinsic::nvvm_add_rz_ftz_f: + return APFloat::rmTowardZero; + } + llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM add"); +} + +inline bool FMulShouldFTZ(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + case Intrinsic::nvvm_mul_rm_ftz_f: + case Intrinsic::nvvm_mul_rn_ftz_f: + case Intrinsic::nvvm_mul_rp_ftz_f: + case Intrinsic::nvvm_mul_rz_ftz_f: + return true; + + case Intrinsic::nvvm_mul_rm_f: + case Intrinsic::nvvm_mul_rn_f: + case Intrinsic::nvvm_mul_rp_f: + case Intrinsic::nvvm_mul_rz_f: + case Intrinsic::nvvm_mul_rm_d: + case Intrinsic::nvvm_mul_rn_d: + case Intrinsic::nvvm_mul_rp_d: + case Intrinsic::nvvm_mul_rz_d: + return false; + } + llvm_unreachable("Checking FTZ flag for invalid NVVM mul intrinsic"); +} + +inline APFloat::roundingMode GetFMulRoundingMode(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + case Intrinsic::nvvm_mul_rm_f: + case Intrinsic::nvvm_mul_rm_d: + case Intrinsic::nvvm_mul_rm_ftz_f: + return APFloat::rmTowardNegative; + case Intrinsic::nvvm_mul_rn_f: + case Intrinsic::nvvm_mul_rn_d: + case Intrinsic::nvvm_mul_rn_ftz_f: + return APFloat::rmNearestTiesToEven; + case Intrinsic::nvvm_mul_rp_f: + case Intrinsic::nvvm_mul_rp_d: + case Intrinsic::nvvm_mul_rp_ftz_f: + return APFloat::rmTowardPositive; + case Intrinsic::nvvm_mul_rz_f: + case Intrinsic::nvvm_mul_rz_d: + case Intrinsic::nvvm_mul_rz_ftz_f: + return APFloat::rmTowardZero; + } + llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM mul"); +} + +inline bool FDivShouldFTZ(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + case Intrinsic::nvvm_div_rm_ftz_f: + case Intrinsic::nvvm_div_rn_ftz_f: + case Intrinsic::nvvm_div_rp_ftz_f: + case Intrinsic::nvvm_div_rz_ftz_f: + return true; + + case Intrinsic::nvvm_div_rm_f: + case Intrinsic::nvvm_div_rn_f: + case Intrinsic::nvvm_div_rp_f: + case Intrinsic::nvvm_div_rz_f: + case Intrinsic::nvvm_div_rm_d: + case Intrinsic::nvvm_div_rn_d: + case Intrinsic::nvvm_div_rp_d: + case Intrinsic::nvvm_div_rz_d: + return false; + } + llvm_unreachable("Checking FTZ flag for invalid NVVM div intrinsic"); +} + +inline APFloat::roundingMode GetFDivRoundingMode(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + case Intrinsic::nvvm_div_rm_f: + case Intrinsic::nvvm_div_rm_d: + case Intrinsic::nvvm_div_rm_ftz_f: + return APFloat::rmTowardNegative; + case Intrinsic::nvvm_div_rn_f: + case Intrinsic::nvvm_div_rn_d: + case Intrinsic::nvvm_div_rn_ftz_f: + return APFloat::rmNearestTiesToEven; + case Intrinsic::nvvm_div_rp_f: + case Intrinsic::nvvm_div_rp_d: + case Intrinsic::nvvm_div_rp_ftz_f: + return APFloat::rmTowardPositive; + case Intrinsic::nvvm_div_rz_f: + case Intrinsic::nvvm_div_rz_d: + case Intrinsic::nvvm_div_rz_ftz_f: + return APFloat::rmTowardZero; + } + llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM div"); +} + +inline bool FMAShouldFTZ(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + case Intrinsic::nvvm_fma_rm_ftz_f: + case Intrinsic::nvvm_fma_rn_ftz_f: + case Intrinsic::nvvm_fma_rp_ftz_f: + case Intrinsic::nvvm_fma_rz_ftz_f: + return true; + + case Intrinsic::nvvm_fma_rm_f: + case Intrinsic::nvvm_fma_rn_f: + case Intrinsic::nvvm_fma_rp_f: + case Intrinsic::nvvm_fma_rz_f: + case Intrinsic::nvvm_fma_rm_d: + case Intrinsic::nvvm_fma_rn_d: + case Intrinsic::nvvm_fma_rp_d: + case Intrinsic::nvvm_fma_rz_d: + return false; + } + llvm_unreachable("Checking FTZ flag for invalid NVVM fma intrinsic"); +} + +inline APFloat::roundingMode GetFMARoundingMode(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + case Intrinsic::nvvm_fma_rm_f: + case Intrinsic::nvvm_fma_rm_d: + case Intrinsic::nvvm_fma_rm_ftz_f: + return APFloat::rmTowardNegative; + case Intrinsic::nvvm_fma_rn_f: + case Intrinsic::nvvm_fma_rn_d: + case Intrinsic::nvvm_fma_rn_ftz_f: + return APFloat::rmNearestTiesToEven; + case Intrinsic::nvvm_fma_rp_f: + case Intrinsic::nvvm_fma_rp_d: + case Intrinsic::nvvm_fma_rp_ftz_f: + return APFloat::rmTowardPositive; + case Intrinsic::nvvm_fma_rz_f: + case Intrinsic::nvvm_fma_rz_d: + case Intrinsic::nvvm_fma_rz_ftz_f: + return APFloat::rmTowardZero; + } + llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM fma"); +} + } // namespace nvvm } // namespace llvm #endif // LLVM_IR_NVVMINTRINSICUTILS_H diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index dd98b62baca33..69fdd4f2b3e71 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1843,6 +1843,62 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::nvvm_sqrt_rn_ftz_f: return !Call->isStrictFP(); + // NVVM add intrinsics with explicit rounding modes + case Intrinsic::nvvm_add_rm_d: + case Intrinsic::nvvm_add_rn_d: + case Intrinsic::nvvm_add_rp_d: + case Intrinsic::nvvm_add_rz_d: + case Intrinsic::nvvm_add_rm_f: + case Intrinsic::nvvm_add_rn_f: + case Intrinsic::nvvm_add_rp_f: + case Intrinsic::nvvm_add_rz_f: + case Intrinsic::nvvm_add_rm_ftz_f: + case Intrinsic::nvvm_add_rn_ftz_f: + case Intrinsic::nvvm_add_rp_ftz_f: + case Intrinsic::nvvm_add_rz_ftz_f: + + // NVVM div intrinsics with explicit rounding modes + case Intrinsic::nvvm_div_rm_d: + case Intrinsic::nvvm_div_rn_d: + case Intrinsic::nvvm_div_rp_d: + case Intrinsic::nvvm_div_rz_d: + case Intrinsic::nvvm_div_rm_f: + case Intrinsic::nvvm_div_rn_f: + case Intrinsic::nvvm_div_rp_f: + case Intrinsic::nvvm_div_rz_f: + case Intrinsic::nvvm_div_rm_ftz_f: + case Intrinsic::nvvm_div_rn_ftz_f: + case Intrinsic::nvvm_div_rp_ftz_f: + case Intrinsic::nvvm_div_rz_ftz_f: + + // NVVM mul intrinsics with explicit rounding modes + case Intrinsic::nvvm_mul_rm_d: + case Intrinsic::nvvm_mul_rn_d: + case Intrinsic::nvvm_mul_rp_d: + case Intrinsic::nvvm_mul_rz_d: + case Intrinsic::nvvm_mul_rm_f: + case Intrinsic::nvvm_mul_rn_f: + case Intrinsic::nvvm_mul_rp_f: + case Intrinsic::nvvm_mul_rz_f: + case Intrinsic::nvvm_mul_rm_ftz_f: + case Intrinsic::nvvm_mul_rn_ftz_f: + case Intrinsic::nvvm_mul_rp_ftz_f: + case Intrinsic::nvvm_mul_rz_ftz_f: + + // NVVM fma intrinsics with explicit rounding modes + case Intrinsic::nvvm_fma_rm_d: + case Intrinsic::nvvm_fma_rn_d: + case Intrinsic::nvvm_fma_rp_d: + case Intrinsic::nvvm_fma_rz_d: + case Intrinsic::nvvm_fma_rm_f: + case Intrinsic::nvvm_fma_rn_f: + case Intrinsic::nvvm_fma_rp_f: + case Intrinsic::nvvm_fma_rz_f: + case Intrinsic::nvvm_fma_rm_ftz_f: + case Intrinsic::nvvm_fma_rn_ftz_f: + case Intrinsic::nvvm_fma_rp_ftz_f: + case Intrinsic::nvvm_fma_rz_ftz_f: + // Sign operations are actually bitwise operations, they do not raise // exceptions even for SNANs. case Intrinsic::fabs: @@ -3318,6 +3374,96 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty, return ConstantFP::get(Ty->getContext(), Res); } + + case Intrinsic::nvvm_add_rm_f: + case Intrinsic::nvvm_add_rn_f: + case Intrinsic::nvvm_add_rp_f: + case Intrinsic::nvvm_add_rz_f: + case Intrinsic::nvvm_add_rm_d: + case Intrinsic::nvvm_add_rn_d: + case Intrinsic::nvvm_add_rp_d: + case Intrinsic::nvvm_add_rz_d: + case Intrinsic::nvvm_add_rm_ftz_f: + case Intrinsic::nvvm_add_rn_ftz_f: + case Intrinsic::nvvm_add_rp_ftz_f: + case Intrinsic::nvvm_add_rz_ftz_f: { + + bool IsFTZ = nvvm::FAddShouldFTZ(IntrinsicID); + APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V; + APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V; + + APFloat::roundingMode RoundMode = + nvvm::GetFAddRoundingMode(IntrinsicID); + + APFloat Res = A; + APFloat::opStatus Status = Res.add(B, RoundMode); + + if (!Res.isNaN() && + (Status == APFloat::opOK || Status == APFloat::opInexact)) { + Res = IsFTZ ? FTZPreserveSign(Res) : Res; + return ConstantFP::get(Ty->getContext(), Res); + } + return nullptr; + } + + case Intrinsic::nvvm_mul_rm_f: + case Intrinsic::nvvm_mul_rn_f: + case Intrinsic::nvvm_mul_rp_f: + case Intrinsic::nvvm_mul_rz_f: + case Intrinsic::nvvm_mul_rm_d: + case Intrinsic::nvvm_mul_rn_d: + case Intrinsic::nvvm_mul_rp_d: + case Intrinsic::nvvm_mul_rz_d: + case Intrinsic::nvvm_mul_rm_ftz_f: + case Intrinsic::nvvm_mul_rn_ftz_f: + case Intrinsic::nvvm_mul_rp_ftz_f: + case Intrinsic::nvvm_mul_rz_ftz_f: { + + bool IsFTZ = nvvm::FMulShouldFTZ(IntrinsicID); + APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V; + APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V; + + APFloat::roundingMode RoundMode = + nvvm::GetFMulRoundingMode(IntrinsicID); + + APFloat Res = A; + APFloat::opStatus Status = Res.multiply(B, RoundMode); + + if (!Res.isNaN() && + (Status == APFloat::opOK || Status == APFloat::opInexact)) { + Res = IsFTZ ? FTZPreserveSign(Res) : Res; + return ConstantFP::get(Ty->getContext(), Res); + } + return nullptr; + } + + case Intrinsic::nvvm_div_rm_f: + case Intrinsic::nvvm_div_rn_f: + case Intrinsic::nvvm_div_rp_f: + case Intrinsic::nvvm_div_rz_f: + case Intrinsic::nvvm_div_rm_d: + case Intrinsic::nvvm_div_rn_d: + case Intrinsic::nvvm_div_rp_d: + case Intrinsic::nvvm_div_rz_d: + case Intrinsic::nvvm_div_rm_ftz_f: + case Intrinsic::nvvm_div_rn_ftz_f: + case Intrinsic::nvvm_div_rp_ftz_f: + case Intrinsic::nvvm_div_rz_ftz_f: { + bool IsFTZ = nvvm::FDivShouldFTZ(IntrinsicID); + APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V; + APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V; + APFloat::roundingMode RoundMode = + nvvm::GetFDivRoundingMode(IntrinsicID); + + APFloat Res = A; + APFloat::opStatus Status = Res.divide(B, RoundMode); + if (!Res.isNaN() && + (Status == APFloat::opOK || Status == APFloat::opInexact)) { + Res = IsFTZ ? FTZPreserveSign(Res) : Res; + return ConstantFP::get(Ty->getContext(), Res); + } + return nullptr; + } } if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) @@ -3729,6 +3875,38 @@ static Constant *ConstantFoldScalarCall3(StringRef Name, V.fusedMultiplyAdd(C2, C3, APFloat::rmNearestTiesToEven); return ConstantFP::get(Ty->getContext(), V); } + + case Intrinsic::nvvm_fma_rm_f: + case Intrinsic::nvvm_fma_rn_f: + case Intrinsic::nvvm_fma_rp_f: + case Intrinsic::nvvm_fma_rz_f: + case Intrinsic::nvvm_fma_rm_d: + case Intrinsic::nvvm_fma_rn_d: + case Intrinsic::nvvm_fma_rp_d: + case Intrinsic::nvvm_fma_rz_d: + case Intrinsic::nvvm_fma_rm_ftz_f: + case Intrinsic::nvvm_fma_rn_ftz_f: + case Intrinsic::nvvm_fma_rp_ftz_f: + case Intrinsic::nvvm_fma_rz_ftz_f: { + bool IsFTZ = nvvm::FMAShouldFTZ(IntrinsicID); + APFloat A = IsFTZ ? FTZPreserveSign(C1) : C1; + APFloat B = IsFTZ ? FTZPreserveSign(C2) : C2; + APFloat C = IsFTZ ? FTZPreserveSign(C3) : C3; + + APFloat::roundingMode RoundMode = + nvvm::GetFMARoundingMode(IntrinsicID); + + APFloat Res = A; + APFloat::opStatus Status = Res.fusedMultiplyAdd(B, C, RoundMode); + + if (!Res.isNaN() && + (Status == APFloat::opOK || Status == APFloat::opInexact)) { + Res = IsFTZ ? FTZPreserveSign(Res) : Res; + return ConstantFP::get(Ty->getContext(), Res); + } + return nullptr; + } + case Intrinsic::amdgcn_cubeid: case Intrinsic::amdgcn_cubema: case Intrinsic::amdgcn_cubesc: diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-add.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-add.ll new file mode 100644 index 0000000000000..a3d87439d74cf --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-add.ll @@ -0,0 +1,876 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instsimplify -march=nvptx64 --mcpu=sm_86 --mattr=+ptx72 -S | FileCheck %s + +; Check constant-folding for NVVM add intrinsics with different rounding modes + +;############################################################### +;# Add(1.25, -2.0) # +;############################################################### +; Tests addition of two normal numbers (1.25 and -2.0) where the result +; is exactly representable. All rounding modes produce the same result. + +define double @test_1_25_minus_2_rm_d() { +; CHECK-LABEL: define double @test_1_25_minus_2_rm_d() { +; CHECK-NEXT: ret double -7.500000e-01 +; + %res = call double @llvm.nvvm.add.rm.d(double 1.25, double -2.0) + ret double %res +} + +define double @test_1_25_minus_2_rn_d() { +; CHECK-LABEL: define double @test_1_25_minus_2_rn_d() { +; CHECK-NEXT: ret double -7.500000e-01 +; + %res = call double @llvm.nvvm.add.rn.d(double 1.25, double -2.0) + ret double %res +} + +define double @test_1_25_minus_2_rp_d() { +; CHECK-LABEL: define double @test_1_25_minus_2_rp_d() { +; CHECK-NEXT: ret double -7.500000e-01 +; + %res = call double @llvm.nvvm.add.rp.d(double 1.25, double -2.0) + ret double %res +} + +define double @test_1_25_minus_2_rz_d() { +; CHECK-LABEL: define double @test_1_25_minus_2_rz_d() { +; CHECK-NEXT: ret double -7.500000e-01 +; + %res = call double @llvm.nvvm.add.rz.d(double 1.25, double -2.0) + ret double %res +} + +define float @test_1_25_minus_2_rm_f() { +; CHECK-LABEL: define float @test_1_25_minus_2_rm_f() { +; CHECK-NEXT: ret float -7.500000e-01 +; + %res = call float @llvm.nvvm.add.rm.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_1_25_minus_2_rn_f() { +; CHECK-LABEL: define float @test_1_25_minus_2_rn_f() { +; CHECK-NEXT: ret float -7.500000e-01 +; + %res = call float @llvm.nvvm.add.rn.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_1_25_minus_2_rp_f() { +; CHECK-LABEL: define float @test_1_25_minus_2_rp_f() { +; CHECK-NEXT: ret float -7.500000e-01 +; + %res = call float @llvm.nvvm.add.rp.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_1_25_minus_2_rz_f() { +; CHECK-LABEL: define float @test_1_25_minus_2_rz_f() { +; CHECK-NEXT: ret float -7.500000e-01 +; + %res = call float @llvm.nvvm.add.rz.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_1_25_minus_2_rm_ftz_f() { +; CHECK-LABEL: define float @test_1_25_minus_2_rm_ftz_f() { +; CHECK-NEXT: ret float -7.500000e-01 +; + %res = call float @llvm.nvvm.add.rm.ftz.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_1_25_minus_2_rn_ftz_f() { +; CHECK-LABEL: define float @test_1_25_minus_2_rn_ftz_f() { +; CHECK-NEXT: ret float -7.500000e-01 +; + %res = call float @llvm.nvvm.add.rn.ftz.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_1_25_minus_2_rp_ftz_f() { +; CHECK-LABEL: define float @test_1_25_minus_2_rp_ftz_f() { +; CHECK-NEXT: ret float -7.500000e-01 +; + %res = call float @llvm.nvvm.add.rp.ftz.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_1_25_minus_2_rz_ftz_f() { +; CHECK-LABEL: define float @test_1_25_minus_2_rz_ftz_f() { +; CHECK-NEXT: ret float -7.500000e-01 +; + %res = call float @llvm.nvvm.add.rz.ftz.f(float 1.25, float -2.0) + ret float %res +} + +;############################################################### +;# Add(0.0, NaN) # +;############################################################### +; Tests addition of a zero with NaN. +; The result is always NaN and the operation is not constant-folded. + +define double @test_zero_plus_nan_rm_d() { +; CHECK-LABEL: define double @test_zero_plus_nan_rm_d() { +; CHECK-NEXT: [[RES:%.*]] = call double @llvm.nvvm.add.rm.d(double 0.000000e+00, double 0x7FF4444400000000) +; CHECK-NEXT: ret double [[RES]] +; + %res = call double @llvm.nvvm.add.rm.d(double 0.0, double 0x7ff4444400000000) + ret double %res +} + +define double @test_zero_plus_nan_rn_d() { +; CHECK-LABEL: define double @test_zero_plus_nan_rn_d() { +; CHECK-NEXT: [[RES:%.*]] = call double @llvm.nvvm.add.rn.d(double 0.000000e+00, double 0x7FF4444400000000) +; CHECK-NEXT: ret double [[RES]] +; + %res = call double @llvm.nvvm.add.rn.d(double 0.0, double 0x7ff4444400000000) + ret double %res +} + +define double @test_zero_plus_nan_rp_d() { +; CHECK-LABEL: define double @test_zero_plus_nan_rp_d() { +; CHECK-NEXT: [[RES:%.*]] = call double @llvm.nvvm.add.rp.d(double 0.000000e+00, double 0x7FF4444400000000) +; CHECK-NEXT: ret double [[RES]] +; + %res = call double @llvm.nvvm.add.rp.d(double 0.0, double 0x7ff4444400000000) + ret double %res +} + +define double @test_zero_plus_nan_rz_d() { +; CHECK-LABEL: define double @test_zero_plus_nan_rz_d() { +; CHECK-NEXT: [[RES:%.*]] = call double @llvm.nvvm.add.rz.d(double 0.000000e+00, double 0x7FF4444400000000) +; CHECK-NEXT: ret double [[RES]] +; + %res = call double @llvm.nvvm.add.rz.d(double 0.0, double 0x7ff4444400000000) + ret double %res +} + +define float @test_zero_plus_nan_rm_f() { +; CHECK-LABEL: define float @test_zero_plus_nan_rm_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.add.rm.f(float 0.000000e+00, float 0x7FFF444400000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.add.rm.f(float 0.0, float 0x7FFF444400000000) + ret float %res +} + +define float @test_zero_plus_nan_rn_f() { +; CHECK-LABEL: define float @test_zero_plus_nan_rn_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.add.rn.f(float 0.000000e+00, float 0x7FFF444400000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.add.rn.f(float 0.0, float 0x7FFF444400000000) + ret float %res +} + +define float @test_zero_plus_nan_rp_f() { +; CHECK-LABEL: define float @test_zero_plus_nan_rp_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.add.rp.f(float 0.000000e+00, float 0x7FFF444400000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.add.rp.f(float 0.0, float 0x7FFF444400000000) + ret float %res +} + +define float @test_zero_plus_nan_rz_f() { +; CHECK-LABEL: define float @test_zero_plus_nan_rz_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.add.rz.f(float 0.000000e+00, float 0x7FFF444400000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.add.rz.f(float 0.0, float 0x7FFF444400000000) + ret float %res +} + +define float @test_zero_plus_nan_rm_ftz_f() { +; CHECK-LABEL: define float @test_zero_plus_nan_rm_ftz_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.add.rm.ftz.f(float 0.000000e+00, float 0x7FFF444400000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.add.rm.ftz.f(float 0.0, float 0x7FFF444400000000) + ret float %res +} + +define float @test_zero_plus_nan_rn_ftz_f() { +; CHECK-LABEL: define float @test_zero_plus_nan_rn_ftz_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.add.rn.ftz.f(float 0.000000e+00, float 0x7FFF444400000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.add.rn.ftz.f(float 0.0, float 0x7FFF444400000000) + ret float %res +} + +define float @test_zero_plus_nan_rp_ftz_f() { +; CHECK-LABEL: define float @test_zero_plus_nan_rp_ftz_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.add.rp.ftz.f(float 0.000000e+00, float 0x7FFF444400000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.add.rp.ftz.f(float 0.0, float 0x7FFF444400000000) + ret float %res +} + +define float @test_zero_plus_nan_rz_ftz_f() { +; CHECK-LABEL: define float @test_zero_plus_nan_rz_ftz_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.add.rz.ftz.f(float 0.000000e+00, float 0x7FFF444400000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.add.rz.ftz.f(float 0.0, float 0x7FFF444400000000) + ret float %res +} + +;############################################################### +;# Add(Subnormal, Subnormal) -> Normal # +;############################################################### +; Tests addition of two positive subnormal numbers (2^-127) +; - Without FTZ: The result is the sum of the subnormals (2^-126) - a normal number +; - With FTZ: The inputs are flushed to zero, so the result is zero (despite the output being normal) + +define double @test_subnorm_plus_subnorm_to_normal_rm_d() { +; CHECK-LABEL: define double @test_subnorm_plus_subnorm_to_normal_rm_d() { +; CHECK-NEXT: ret double 0x3810000000000000 +; + %res = call double @llvm.nvvm.add.rm.d(double 0x3800000000000000, double 0x3800000000000000) + ret double %res +} + +define double @test_subnorm_plus_subnorm_to_normal_rn_d() { +; CHECK-LABEL: define double @test_subnorm_plus_subnorm_to_normal_rn_d() { +; CHECK-NEXT: ret double 0x3810000000000000 +; + %res = call double @llvm.nvvm.add.rn.d(double 0x3800000000000000, double 0x3800000000000000) + ret double %res +} + +define double @test_subnorm_plus_subnorm_to_normal_rp_d() { +; CHECK-LABEL: define double @test_subnorm_plus_subnorm_to_normal_rp_d() { +; CHECK-NEXT: ret double 0x3810000000000000 +; + %res = call double @llvm.nvvm.add.rp.d(double 0x3800000000000000, double 0x3800000000000000) + ret double %res +} + +define double @test_subnorm_plus_subnorm_to_normal_rz_d() { +; CHECK-LABEL: define double @test_subnorm_plus_subnorm_to_normal_rz_d() { +; CHECK-NEXT: ret double 0x3810000000000000 +; + %res = call double @llvm.nvvm.add.rz.d(double 0x3800000000000000, double 0x3800000000000000) + ret double %res +} + +define float @test_subnorm_plus_subnorm_to_normal_rm_f() { +; CHECK-LABEL: define float @test_subnorm_plus_subnorm_to_normal_rm_f() { +; CHECK-NEXT: ret float 0x3810000000000000 +; + %res = call float @llvm.nvvm.add.rm.f(float 0x3800000000000000, float 0x3800000000000000) + ret float %res +} + +define float @test_subnorm_plus_subnorm_to_normal_rn_f() { +; CHECK-LABEL: define float @test_subnorm_plus_subnorm_to_normal_rn_f() { +; CHECK-NEXT: ret float 0x3810000000000000 +; + %res = call float @llvm.nvvm.add.rn.f(float 0x3800000000000000, float 0x3800000000000000) + ret float %res +} + +define float @test_subnorm_plus_subnorm_to_normal_rp_f() { +; CHECK-LABEL: define float @test_subnorm_plus_subnorm_to_normal_rp_f() { +; CHECK-NEXT: ret float 0x3810000000000000 +; + %res = call float @llvm.nvvm.add.rp.f(float 0x3800000000000000, float 0x3800000000000000) + ret float %res +} + +define float @test_subnorm_plus_subnorm_to_normal_rz_f() { +; CHECK-LABEL: define float @test_subnorm_plus_subnorm_to_normal_rz_f() { +; CHECK-NEXT: ret float 0x3810000000000000 +; + %res = call float @llvm.nvvm.add.rz.f(float 0x3800000000000000, float 0x3800000000000000) + ret float %res +} + +define float @test_subnorm_plus_subnorm_to_normal_rm_ftz_f() { +; CHECK-LABEL: define float @test_subnorm_plus_subnorm_to_normal_rm_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.add.rm.ftz.f(float 0x3800000000000000, float 0x3800000000000000) + ret float %res +} + +define float @test_subnorm_plus_subnorm_to_normal_rn_ftz_f() { +; CHECK-LABEL: define float @test_subnorm_plus_subnorm_to_normal_rn_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.add.rn.ftz.f(float 0x3800000000000000, float 0x3800000000000000) + ret float %res +} + +define float @test_subnorm_plus_subnorm_to_normal_rp_ftz_f() { +; CHECK-LABEL: define float @test_subnorm_plus_subnorm_to_normal_rp_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.add.rp.ftz.f(float 0x3800000000000000, float 0x3800000000000000) + ret float %res +} + +define float @test_subnorm_plus_subnorm_to_normal_rz_ftz_f() { +; CHECK-LABEL: define float @test_subnorm_plus_subnorm_to_normal_rz_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.add.rz.ftz.f(float 0x3800000000000000, float 0x3800000000000000) + ret float %res +} + +;############################################################### +;# Add(Normal, -Subnormal) -> Subnormal # +;############################################################### +; Tests addition of 2^-126 (the smallest normal number) and -(2^127). +; - Without FTZ: The result is correctly computed as a subnormal (2^127) +; - With FTZ: The result is flushed to zero. +; This verifies that the output is also flushed to zero, as we'd end up +; with 2^-126 if we only flushed the inputs. + +define double @test_normal_minus_subnorm_to_subnorm_rm_d() { +; CHECK-LABEL: define double @test_normal_minus_subnorm_to_subnorm_rm_d() { +; CHECK-NEXT: ret double 0x3800000000000000 +; + %res = call double @llvm.nvvm.add.rm.d(double 0x3810000000000000, double 0xB800000000000000) + ret double %res +} + +define double @test_normal_minus_subnorm_to_subnorm_rn_d() { +; CHECK-LABEL: define double @test_normal_minus_subnorm_to_subnorm_rn_d() { +; CHECK-NEXT: ret double 0x3800000000000000 +; + %res = call double @llvm.nvvm.add.rn.d(double 0x3810000000000000, double 0xB800000000000000) + ret double %res +} + +define double @test_normal_minus_subnorm_to_subnorm_rp_d() { +; CHECK-LABEL: define double @test_normal_minus_subnorm_to_subnorm_rp_d() { +; CHECK-NEXT: ret double 0x3800000000000000 +; + %res = call double @llvm.nvvm.add.rp.d(double 0x3810000000000000, double 0xB800000000000000) + ret double %res +} + +define double @test_normal_minus_subnorm_to_subnorm_rz_d() { +; CHECK-LABEL: define double @test_normal_minus_subnorm_to_subnorm_rz_d() { +; CHECK-NEXT: ret double 0x3800000000000000 +; + %res = call double @llvm.nvvm.add.rz.d(double 0x3810000000000000, double 0xB800000000000000) + ret double %res +} + +define float @test_normal_minus_subnorm_to_subnorm_rm_f() { +; CHECK-LABEL: define float @test_normal_minus_subnorm_to_subnorm_rm_f() { +; CHECK-NEXT: ret float 0x3800000000000000 +; + %res = call float @llvm.nvvm.add.rm.f(float 0x3810000000000000, float 0xB800000000000000) + ret float %res +} + +define float @test_normal_minus_subnorm_to_subnorm_rn_f() { +; CHECK-LABEL: define float @test_normal_minus_subnorm_to_subnorm_rn_f() { +; CHECK-NEXT: ret float 0x3800000000000000 +; + %res = call float @llvm.nvvm.add.rn.f(float 0x3810000000000000, float 0xB800000000000000) + ret float %res +} + +define float @test_normal_minus_subnorm_to_subnorm_rp_f() { +; CHECK-LABEL: define float @test_normal_minus_subnorm_to_subnorm_rp_f() { +; CHECK-NEXT: ret float 0x3800000000000000 +; + %res = call float @llvm.nvvm.add.rp.f(float 0x3810000000000000, float 0xB800000000000000) + ret float %res +} + +define float @test_normal_minus_subnorm_to_subnorm_rz_f() { +; CHECK-LABEL: define float @test_normal_minus_subnorm_to_subnorm_rz_f() { +; CHECK-NEXT: ret float 0x3800000000000000 +; + %res = call float @llvm.nvvm.add.rz.f(float 0x3810000000000000, float 0xB800000000000000) + ret float %res +} + +define float @test_normal_minus_subnorm_to_subnorm_rm_ftz_f() { +; CHECK-LABEL: define float @test_normal_minus_subnorm_to_subnorm_rm_ftz_f() { +; CHECK-NEXT: ret float 0x3810000000000000 +; + %res = call float @llvm.nvvm.add.rm.ftz.f(float 0x3810000000000000, float 0xB800000000000000) + ret float %res +} + +define float @test_normal_minus_subnorm_to_subnorm_rn_ftz_f() { +; CHECK-LABEL: define float @test_normal_minus_subnorm_to_subnorm_rn_ftz_f() { +; CHECK-NEXT: ret float 0x3810000000000000 +; + %res = call float @llvm.nvvm.add.rn.ftz.f(float 0x3810000000000000, float 0xB800000000000000) + ret float %res +} + +define float @test_normal_minus_subnorm_to_subnorm_rp_ftz_f() { +; CHECK-LABEL: define float @test_normal_minus_subnorm_to_subnorm_rp_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.add.rp.ftz.f(float 0x3800000000000000, float 0x3800000000000000) + ret float %res +} + +define float @test_normal_minus_subnorm_to_subnorm_rz_ftz_f() { +; CHECK-LABEL: define float @test_normal_minus_subnorm_to_subnorm_rz_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.add.rz.ftz.f(float 0x3800000000000000, float 0x3800000000000000) + ret float %res +} + +;############################################################### +;# Add(1.0, 2^(-25)) # +;############################################################### +; Tests addition of 1.0 and 2^(-25) where the exact result falls between +; 1.0 and 1.0 + 2^(-23): +; - RN, RZ, RM: Return 1.0 (rounding toward nearest/zero/down) +; - RP: Returns 1.0 + 2^(-23) (rounding up) + +define float @test_1_plus_ulp_rm_f() { +; CHECK-LABEL: define float @test_1_plus_ulp_rm_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.add.rm.f(float 1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_1_plus_ulp_rn_f() { +; CHECK-LABEL: define float @test_1_plus_ulp_rn_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.add.rn.f(float 1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_1_plus_ulp_rp_f() { +; CHECK-LABEL: define float @test_1_plus_ulp_rp_f() { +; CHECK-NEXT: ret float 0x3FF0000020000000 +; + %res = call float @llvm.nvvm.add.rp.f(float 1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_1_plus_ulp_rz_f() { +; CHECK-LABEL: define float @test_1_plus_ulp_rz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.add.rz.f(float 1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_1_plus_ulp_rm_ftz_f() { +; CHECK-LABEL: define float @test_1_plus_ulp_rm_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.add.rm.ftz.f(float 1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_1_plus_ulp_rn_ftz_f() { +; CHECK-LABEL: define float @test_1_plus_ulp_rn_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.add.rn.ftz.f(float 1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_1_plus_ulp_rp_ftz_f() { +; CHECK-LABEL: define float @test_1_plus_ulp_rp_ftz_f() { +; CHECK-NEXT: ret float 0x3FF0000020000000 +; + %res = call float @llvm.nvvm.add.rp.ftz.f(float 1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_1_plus_ulp_rz_ftz_f() { +; CHECK-LABEL: define float @test_1_plus_ulp_rz_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.add.rz.ftz.f(float 1.0, float 0x3E60000000000000) + ret float %res +} + +;############################################################### +;# Add(1.0, 2^(-54)) # +;############################################################### +; Tests addition of 1.0 and 2^(-54) where the exact result falls between +; 1.0 and 1.0 + 2^(-52): +; - RN, RZ, RM: Return 1.0 (rounding to nearest/zero/down) +; - RP: Returns 1.0 + 2^(-52) (rounding up) + +define double @test_1_plus_ulp_rm_d() { +; CHECK-LABEL: define double @test_1_plus_ulp_rm_d() { +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = call double @llvm.nvvm.add.rm.d(double 1.0, double 0x3C90000000000000) + ret double %res +} + +define double @test_1_plus_ulp_rn_d() { +; CHECK-LABEL: define double @test_1_plus_ulp_rn_d() { +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = call double @llvm.nvvm.add.rn.d(double 1.0, double 0x3C90000000000000) + ret double %res +} + +define double @test_1_plus_ulp_rp_d() { +; CHECK-LABEL: define double @test_1_plus_ulp_rp_d() { +; CHECK-NEXT: ret double 0x3FF0000000000001 +; + %res = call double @llvm.nvvm.add.rp.d(double 1.0, double 0x3C90000000000000) + ret double %res +} + +define double @test_1_plus_ulp_rz_d() { +; CHECK-LABEL: define double @test_1_plus_ulp_rz_d() { +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = call double @llvm.nvvm.add.rz.d(double 1.0, double 0x3C90000000000000) + ret double %res +} + +;############################################################### +;# Add(-1.0, 2^(-25)) # +;############################################################### +; Tests addition of -1.0 and 2^(-25)) where the exact result falls between +; -1.0 and -1.0 + 2^(-23): +; - RN, RM: Returns -1.0 (rounding toward nearest/down) +; - RZ, RP: Return -1.0 + 2^(-23) (rounding toward zero/up) + +define float @test_neg_1_plus_ulp_rm_f() { +; CHECK-LABEL: define float @test_neg_1_plus_ulp_rm_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.add.rm.f(float -1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_neg_1_plus_ulp_rn_f() { +; CHECK-LABEL: define float @test_neg_1_plus_ulp_rn_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.add.rn.f(float -1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_neg_1_plus_ulp_rp_f() { +; CHECK-LABEL: define float @test_neg_1_plus_ulp_rp_f() { +; CHECK-NEXT: ret float 0xBFEFFFFFE0000000 +; + %res = call float @llvm.nvvm.add.rp.f(float -1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_neg_1_plus_ulp_rz_f() { +; CHECK-LABEL: define float @test_neg_1_plus_ulp_rz_f() { +; CHECK-NEXT: ret float 0xBFEFFFFFE0000000 +; + %res = call float @llvm.nvvm.add.rz.f(float -1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_neg_1_plus_ulp_rm_ftz_f() { +; CHECK-LABEL: define float @test_neg_1_plus_ulp_rm_ftz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.add.rm.ftz.f(float -1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_neg_1_plus_ulp_rn_ftz_f() { +; CHECK-LABEL: define float @test_neg_1_plus_ulp_rn_ftz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.add.rn.ftz.f(float -1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_neg_1_plus_ulp_rp_ftz_f() { +; CHECK-LABEL: define float @test_neg_1_plus_ulp_rp_ftz_f() { +; CHECK-NEXT: ret float 0xBFEFFFFFE0000000 +; + %res = call float @llvm.nvvm.add.rp.ftz.f(float -1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_neg_1_plus_ulp_rz_ftz_f() { +; CHECK-LABEL: define float @test_neg_1_plus_ulp_rz_ftz_f() { +; CHECK-NEXT: ret float 0xBFEFFFFFE0000000 +; + %res = call float @llvm.nvvm.add.rz.ftz.f(float -1.0, float 0x3E60000000000000) + ret float %res +} + +;############################################################### +;# Add(-1.0, 2^(-54)) # +;############################################################### +; Tests addition of -1.0 and 2^(-54) where the exact result falls between +; -1.0 and -1.0 + 2^(-52): +; - RN, RM: Return -1.0 (rounding toward nearest/down) +; - RZ, RP: Return -1.0 + 2^(-52) (rounding toward zero/up) + +define double @test_neg_1_plus_ulp_rm_d() { +; CHECK-LABEL: define double @test_neg_1_plus_ulp_rm_d() { +; CHECK-NEXT: ret double -1.000000e+00 +; + %res = call double @llvm.nvvm.add.rm.d(double -1.0, double 0x3C90000000000000) + ret double %res +} + +define double @test_neg_1_plus_ulp_rn_d() { +; CHECK-LABEL: define double @test_neg_1_plus_ulp_rn_d() { +; CHECK-NEXT: ret double -1.000000e+00 +; + %res = call double @llvm.nvvm.add.rn.d(double -1.0, double 0x3C90000000000000) + ret double %res +} + +define double @test_neg_1_plus_ulp_rp_d() { +; CHECK-LABEL: define double @test_neg_1_plus_ulp_rp_d() { +; CHECK-NEXT: ret double 0xBFEFFFFFFFFFFFFF +; + %res = call double @llvm.nvvm.add.rp.d(double -1.0, double 0x3C90000000000000) + ret double %res +} + +define double @test_neg_1_plus_ulp_rz_d() { +; CHECK-LABEL: define double @test_neg_1_plus_ulp_rz_d() { +; CHECK-NEXT: ret double 0xBFEFFFFFFFFFFFFF +; + %res = call double @llvm.nvvm.add.rz.d(double -1.0, double 0x3C90000000000000) + ret double %res +} + +;############################################################### +;# Add(1.0, -2^(-25)) # +;############################################################### +; Tests addition of 1.0 and -2^(-25) where the exact result falls between +; 1.0 and 1.0 - 2^(-23): +; - RN, RP: Return 1.0 (rounding toward nearest/up) +; - RZ, RM: Return 1.0 - 2^(-23) (rounding toward zero/down) + +define float @test_1_minus_ulp_rm_f() { +; CHECK-LABEL: define float @test_1_minus_ulp_rm_f() { +; CHECK-NEXT: ret float 0x3FEFFFFFE0000000 +; + %res = call float @llvm.nvvm.add.rm.f(float 1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_1_minus_ulp_rn_f() { +; CHECK-LABEL: define float @test_1_minus_ulp_rn_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.add.rn.f(float 1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_1_minus_ulp_rp_f() { +; CHECK-LABEL: define float @test_1_minus_ulp_rp_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.add.rp.f(float 1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_1_minus_ulp_rz_f() { +; CHECK-LABEL: define float @test_1_minus_ulp_rz_f() { +; CHECK-NEXT: ret float 0x3FEFFFFFE0000000 +; + %res = call float @llvm.nvvm.add.rz.f(float 1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_1_minus_ulp_rm_ftz_f() { +; CHECK-LABEL: define float @test_1_minus_ulp_rm_ftz_f() { +; CHECK-NEXT: ret float 0x3FEFFFFFE0000000 +; + %res = call float @llvm.nvvm.add.rm.ftz.f(float 1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_1_minus_ulp_rn_ftz_f() { +; CHECK-LABEL: define float @test_1_minus_ulp_rn_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.add.rn.ftz.f(float 1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_1_minus_ulp_rp_ftz_f() { +; CHECK-LABEL: define float @test_1_minus_ulp_rp_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.add.rp.ftz.f(float 1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_1_minus_ulp_rz_ftz_f() { +; CHECK-LABEL: define float @test_1_minus_ulp_rz_ftz_f() { +; CHECK-NEXT: ret float 0x3FEFFFFFE0000000 +; + %res = call float @llvm.nvvm.add.rz.ftz.f(float 1.0, float 0xBE60000000000000) + ret float %res +} + +;############################################################### +;# Add(1.0, -2^(-54)) # +;############################################################### +; Tests addition of 1.0 and -2^(-54) where the exact result falls between +; 1.0 and 1.0 - 2^(-52): +; - RN, RP: Return 1.0 (rounding toward nearest/up) +; - RZ, RM: Return 1.0 - 2^(-52) (rounding toward zero/down) + +define double @test_1_minus_ulp_rm_d() { +; CHECK-LABEL: define double @test_1_minus_ulp_rm_d() { +; CHECK-NEXT: ret double 0x3FEFFFFFFFFFFFFF +; + %res = call double @llvm.nvvm.add.rm.d(double 1.0, double 0xBC90000000000000) + ret double %res +} + +define double @test_1_minus_ulp_rn_d() { +; CHECK-LABEL: define double @test_1_minus_ulp_rn_d() { +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = call double @llvm.nvvm.add.rn.d(double 1.0, double 0xBC90000000000000) + ret double %res +} + +define double @test_1_minus_ulp_rp_d() { +; CHECK-LABEL: define double @test_1_minus_ulp_rp_d() { +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = call double @llvm.nvvm.add.rp.d(double 1.0, double 0xBC90000000000000) + ret double %res +} + +define double @test_1_minus_ulp_rz_d() { +; CHECK-LABEL: define double @test_1_minus_ulp_rz_d() { +; CHECK-NEXT: ret double 0x3FEFFFFFFFFFFFFF +; + %res = call double @llvm.nvvm.add.rz.d(double 1.0, double 0xBC90000000000000) + ret double %res +} + +;############################################################### +;# Add(-1.0, -2^(-25)) # +;############################################################### +; Tests addition of -1.0 and -2^(-25) where the exact result falls between +; -1.0 and -1.0 - 2^(-23): +; - RN, RZ, RP: Return -1.0 (rounding to nearest/zero/up) +; - RM: Return -1.0 - 2^(-23) (rounding down) + +define float @test_neg_1_minus_ulp_rm_f() { +; CHECK-LABEL: define float @test_neg_1_minus_ulp_rm_f() { +; CHECK-NEXT: ret float 0xBFF0000020000000 +; + %res = call float @llvm.nvvm.add.rm.f(float -1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_neg_1_minus_ulp_rn_f() { +; CHECK-LABEL: define float @test_neg_1_minus_ulp_rn_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.add.rn.f(float -1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_neg_1_minus_ulp_rp_f() { +; CHECK-LABEL: define float @test_neg_1_minus_ulp_rp_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.add.rp.f(float -1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_neg_1_minus_ulp_rz_f() { +; CHECK-LABEL: define float @test_neg_1_minus_ulp_rz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.add.rz.f(float -1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_neg_1_minus_ulp_rm_ftz_f() { +; CHECK-LABEL: define float @test_neg_1_minus_ulp_rm_ftz_f() { +; CHECK-NEXT: ret float 0xBFF0000020000000 +; + %res = call float @llvm.nvvm.add.rm.ftz.f(float -1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_neg_1_minus_ulp_rn_ftz_f() { +; CHECK-LABEL: define float @test_neg_1_minus_ulp_rn_ftz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.add.rn.ftz.f(float -1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_neg_1_minus_ulp_rp_ftz_f() { +; CHECK-LABEL: define float @test_neg_1_minus_ulp_rp_ftz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.add.rp.ftz.f(float -1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_neg_1_minus_ulp_rz_ftz_f() { +; CHECK-LABEL: define float @test_neg_1_minus_ulp_rz_ftz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.add.rz.ftz.f(float -1.0, float 0xBE60000000000000) + ret float %res +} + +;############################################################### +;# Add(-1.0, -2^(-54)) # +;############################################################### +; Tests addition of -1.0 and -2^(-54) where the exact result falls between +; -1.0 and -1.0 - 2^(-52): +; - RN, RZ, RP: Return -1.0 (rounding to nearest/zero/up) +; - RM: Return -1.0 - 2^(-52) (rounding down) + +define double @test_neg_1_minus_ulp_rm_d() { +; CHECK-LABEL: define double @test_neg_1_minus_ulp_rm_d() { +; CHECK-NEXT: ret double 0xBFF0000000000001 +; + %res = call double @llvm.nvvm.add.rm.d(double -1.0, double 0xBC90000000000000) + ret double %res +} + +define double @test_neg_1_minus_ulp_rn_d() { +; CHECK-LABEL: define double @test_neg_1_minus_ulp_rn_d() { +; CHECK-NEXT: ret double -1.000000e+00 +; + %res = call double @llvm.nvvm.add.rn.d(double -1.0, double 0xBC90000000000000) + ret double %res +} + +define double @test_neg_1_minus_ulp_rp_d() { +; CHECK-LABEL: define double @test_neg_1_minus_ulp_rp_d() { +; CHECK-NEXT: ret double -1.000000e+00 +; + %res = call double @llvm.nvvm.add.rp.d(double -1.0, double 0xBC90000000000000) + ret double %res +} + +define double @test_neg_1_minus_ulp_rz_d() { +; CHECK-LABEL: define double @test_neg_1_minus_ulp_rz_d() { +; CHECK-NEXT: ret double -1.000000e+00 +; + %res = call double @llvm.nvvm.add.rz.d(double -1.0, double 0xBC90000000000000) + ret double %res +} diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-div.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-div.ll new file mode 100644 index 0000000000000..fab674cd9069b --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-div.ll @@ -0,0 +1,880 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instsimplify -march=nvptx64 --mcpu=sm_86 --mattr=+ptx72 -S | FileCheck %s + +; Check constant-folding for NVVM divide intrinsics with different rounding modes + +;############################################################### +;# Div(1.25, 2.0) # +;############################################################### +; Tests division of two normal numbers (1.25 by 2.0) where the result +; is exactly representable. All rounding modes should produce the same result. + +define double @test_1_25_div_2_rm_d() { +; CHECK-LABEL: define double @test_1_25_div_2_rm_d() { +; CHECK-NEXT: ret double 6.250000e-01 +; + %res = call double @llvm.nvvm.div.rm.d(double 1.25, double 2.0) + ret double %res +} + +define double @test_1_25_div_2_rn_d() { +; CHECK-LABEL: define double @test_1_25_div_2_rn_d() { +; CHECK-NEXT: ret double 6.250000e-01 +; + %res = call double @llvm.nvvm.div.rn.d(double 1.25, double 2.0) + ret double %res +} + +define double @test_1_25_div_2_rp_d() { +; CHECK-LABEL: define double @test_1_25_div_2_rp_d() { +; CHECK-NEXT: ret double 6.250000e-01 +; + %res = call double @llvm.nvvm.div.rp.d(double 1.25, double 2.0) + ret double %res +} + +define double @test_1_25_div_2_rz_d() { +; CHECK-LABEL: define double @test_1_25_div_2_rz_d() { +; CHECK-NEXT: ret double 6.250000e-01 +; + %res = call double @llvm.nvvm.div.rz.d(double 1.25, double 2.0) + ret double %res +} + +define float @test_1_25_div_2_rm_f() { +; CHECK-LABEL: define float @test_1_25_div_2_rm_f() { +; CHECK-NEXT: ret float 6.250000e-01 +; + %res = call float @llvm.nvvm.div.rm.f(float 1.25, float 2.0) + ret float %res +} + +define float @test_1_25_div_2_rn_f() { +; CHECK-LABEL: define float @test_1_25_div_2_rn_f() { +; CHECK-NEXT: ret float 6.250000e-01 +; + %res = call float @llvm.nvvm.div.rn.f(float 1.25, float 2.0) + ret float %res +} + +define float @test_1_25_div_2_rp_f() { +; CHECK-LABEL: define float @test_1_25_div_2_rp_f() { +; CHECK-NEXT: ret float 6.250000e-01 +; + %res = call float @llvm.nvvm.div.rp.f(float 1.25, float 2.0) + ret float %res +} + +define float @test_1_25_div_2_rz_f() { +; CHECK-LABEL: define float @test_1_25_div_2_rz_f() { +; CHECK-NEXT: ret float 6.250000e-01 +; + %res = call float @llvm.nvvm.div.rz.f(float 1.25, float 2.0) + ret float %res +} + +define float @test_1_25_div_2_rm_ftz_f() { +; CHECK-LABEL: define float @test_1_25_div_2_rm_ftz_f() { +; CHECK-NEXT: ret float 6.250000e-01 +; + %res = call float @llvm.nvvm.div.rm.ftz.f(float 1.25, float 2.0) + ret float %res +} + +define float @test_1_25_div_2_rn_ftz_f() { +; CHECK-LABEL: define float @test_1_25_div_2_rn_ftz_f() { +; CHECK-NEXT: ret float 6.250000e-01 +; + %res = call float @llvm.nvvm.div.rn.ftz.f(float 1.25, float 2.0) + ret float %res +} + +define float @test_1_25_div_2_rp_ftz_f() { +; CHECK-LABEL: define float @test_1_25_div_2_rp_ftz_f() { +; CHECK-NEXT: ret float 6.250000e-01 +; + %res = call float @llvm.nvvm.div.rp.ftz.f(float 1.25, float 2.0) + ret float %res +} + +define float @test_1_25_div_2_rz_ftz_f() { +; CHECK-LABEL: define float @test_1_25_div_2_rz_ftz_f() { +; CHECK-NEXT: ret float 6.250000e-01 +; + %res = call float @llvm.nvvm.div.rz.ftz.f(float 1.25, float 2.0) + ret float %res +} + +;############################################################### +;# Div(Subnormal, 1.0) # +;############################################################### +; Tests division of a subnormal number by 1.0 to verify FTZ behavior. +; For float, we use 2^-149 (smallest subnormal). +; For double, we use 2^-1074 (smallest subnormal). +; Without FTZ, the result should be the subnormal number. +; With FTZ, the result should be 0.0. + +define double @test_subnorm_div_1_rm_d() { +; CHECK-LABEL: define double @test_subnorm_div_1_rm_d() { +; CHECK-NEXT: ret double 4.940660e-324 +; + %res = call double @llvm.nvvm.div.rm.d(double 0x0000000000000001, double 1.0) + ret double %res +} + +define double @test_subnorm_div_1_rn_d() { +; CHECK-LABEL: define double @test_subnorm_div_1_rn_d() { +; CHECK-NEXT: ret double 4.940660e-324 +; + %res = call double @llvm.nvvm.div.rn.d(double 0x0000000000000001, double 1.0) + ret double %res +} + +define double @test_subnorm_div_1_rp_d() { +; CHECK-LABEL: define double @test_subnorm_div_1_rp_d() { +; CHECK-NEXT: ret double 4.940660e-324 +; + %res = call double @llvm.nvvm.div.rp.d(double 0x0000000000000001, double 1.0) + ret double %res +} + +define double @test_subnorm_div_1_rz_d() { +; CHECK-LABEL: define double @test_subnorm_div_1_rz_d() { +; CHECK-NEXT: ret double 4.940660e-324 +; + %res = call double @llvm.nvvm.div.rz.d(double 0x0000000000000001, double 1.0) + ret double %res +} + +define float @test_subnorm_div_1_rm_f() { +; CHECK-LABEL: define float @test_subnorm_div_1_rm_f() { +; CHECK-NEXT: ret float 0x36A0000000000000 +; + %res = call float @llvm.nvvm.div.rm.f(float 0x36A0000000000000, float 1.0) + ret float %res +} + +define float @test_subnorm_div_1_rn_f() { +; CHECK-LABEL: define float @test_subnorm_div_1_rn_f() { +; CHECK-NEXT: ret float 0x36A0000000000000 +; + %res = call float @llvm.nvvm.div.rn.f(float 0x36A0000000000000, float 1.0) + ret float %res +} + +define float @test_subnorm_div_1_rp_f() { +; CHECK-LABEL: define float @test_subnorm_div_1_rp_f() { +; CHECK-NEXT: ret float 0x36A0000000000000 +; + %res = call float @llvm.nvvm.div.rp.f(float 0x36A0000000000000, float 1.0) + ret float %res +} + +define float @test_subnorm_div_1_rz_f() { +; CHECK-LABEL: define float @test_subnorm_div_1_rz_f() { +; CHECK-NEXT: ret float 0x36A0000000000000 +; + %res = call float @llvm.nvvm.div.rz.f(float 0x36A0000000000000, float 1.0) + ret float %res +} + +define float @test_subnorm_div_1_rm_ftz_f() { +; CHECK-LABEL: define float @test_subnorm_div_1_rm_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.div.rm.ftz.f(float 0x36A0000000000000, float 1.0) + ret float %res +} + +define float @test_subnorm_div_1_rn_ftz_f() { +; CHECK-LABEL: define float @test_subnorm_div_1_rn_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.div.rn.ftz.f(float 0x36A0000000000000, float 1.0) + ret float %res +} + +define float @test_subnorm_div_1_rp_ftz_f() { +; CHECK-LABEL: define float @test_subnorm_div_1_rp_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.div.rp.ftz.f(float 0x36A0000000000000, float 1.0) + ret float %res +} + +define float @test_subnorm_div_1_rz_ftz_f() { +; CHECK-LABEL: define float @test_subnorm_div_1_rz_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.div.rz.ftz.f(float 0x36A0000000000000, float 1.0) + ret float %res +} + +;############################################################### +;# Div(Normal, Normal) -> Subnormal # +;############################################################### +; Tests division of two normal numbers that produces a subnormal result. +; We divide the smallest normal float (2^-126 or 2^-1022 for doubles) by 2 to get 2^-127 (or 2^-1023), +; which is subnormal. This tests the transition from normal to subnormal numbers. + +define double @test_normal_div_normal_to_subnorm_rm_d() { +; CHECK-LABEL: define double @test_normal_div_normal_to_subnorm_rm_d() { +; CHECK-NEXT: ret double 0x8000000000000 +; + %res = call double @llvm.nvvm.div.rm.d(double 0x10000000000000, double 2.0) + ret double %res +} + +define double @test_normal_div_normal_to_subnorm_rn_d() { +; CHECK-LABEL: define double @test_normal_div_normal_to_subnorm_rn_d() { +; CHECK-NEXT: ret double 0x8000000000000 +; + %res = call double @llvm.nvvm.div.rn.d(double 0x10000000000000, double 2.0) + ret double %res +} + +define double @test_normal_div_normal_to_subnorm_rp_d() { +; CHECK-LABEL: define double @test_normal_div_normal_to_subnorm_rp_d() { +; CHECK-NEXT: ret double 0x8000000000000 +; + %res = call double @llvm.nvvm.div.rp.d(double 0x10000000000000, double 2.0) + ret double %res +} + +define double @test_normal_div_normal_to_subnorm_rz_d() { +; CHECK-LABEL: define double @test_normal_div_normal_to_subnorm_rz_d() { +; CHECK-NEXT: ret double 0x8000000000000 +; + %res = call double @llvm.nvvm.div.rz.d(double 0x10000000000000, double 2.0) + ret double %res +} + +define float @test_normal_div_normal_to_subnorm_rm_f() { +; CHECK-LABEL: define float @test_normal_div_normal_to_subnorm_rm_f() { +; CHECK-NEXT: ret float 0x3800000000000000 +; + %res = call float @llvm.nvvm.div.rm.f(float 0x3810000000000000, float 2.0) + ret float %res +} + +define float @test_normal_div_normal_to_subnorm_rn_f() { +; CHECK-LABEL: define float @test_normal_div_normal_to_subnorm_rn_f() { +; CHECK-NEXT: ret float 0x3800000000000000 +; + %res = call float @llvm.nvvm.div.rn.f(float 0x3810000000000000, float 2.0) + ret float %res +} + +define float @test_normal_div_normal_to_subnorm_rp_f() { +; CHECK-LABEL: define float @test_normal_div_normal_to_subnorm_rp_f() { +; CHECK-NEXT: ret float 0x3800000000000000 +; + %res = call float @llvm.nvvm.div.rp.f(float 0x3810000000000000, float 2.0) + ret float %res +} + +define float @test_normal_div_normal_to_subnorm_rz_f() { +; CHECK-LABEL: define float @test_normal_div_normal_to_subnorm_rz_f() { +; CHECK-NEXT: ret float 0x3800000000000000 +; + %res = call float @llvm.nvvm.div.rz.f(float 0x3810000000000000, float 2.0) + ret float %res +} + +define float @test_normal_div_normal_to_subnorm_rm_ftz_f() { +; CHECK-LABEL: define float @test_normal_div_normal_to_subnorm_rm_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.div.rm.ftz.f(float 0x3810000000000000, float 2.0) + ret float %res +} + +define float @test_normal_div_normal_to_subnorm_rn_ftz_f() { +; CHECK-LABEL: define float @test_normal_div_normal_to_subnorm_rn_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.div.rn.ftz.f(float 0x3810000000000000, float 2.0) + ret float %res +} + +define float @test_normal_div_normal_to_subnorm_rp_ftz_f() { +; CHECK-LABEL: define float @test_normal_div_normal_to_subnorm_rp_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.div.rp.ftz.f(float 0x3810000000000000, float 2.0) + ret float %res +} + +define float @test_normal_div_normal_to_subnorm_rz_ftz_f() { +; CHECK-LABEL: define float @test_normal_div_normal_to_subnorm_rz_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.div.rz.ftz.f(float 0x3810000000000000, float 2.0) + ret float %res +} + +;############################################################### +;# Div( 4/3 + epsilon , 4/3 - epsilon) # +;############################################################### +; Tests division of numbers just above and just below 4/3. +; The result falls between 1.0 and 1.0 + 2^-23 +; - RZ, RM round to 1.0 (rounding towards zero/down) +; - RN, RP rounds to 1.0 + 2^-23 (rounding towards nearest/up) + +define float @test_div_just_above_1_rm_f() { +; CHECK-LABEL: define float @test_div_just_above_1_rm_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.div.rm.f(float 0x3FF5555560000000, float 0x3FF5555540000000) + ret float %res +} + +define float @test_div_just_above_1_rn_f() { +; CHECK-LABEL: define float @test_div_just_above_1_rn_f() { +; CHECK-NEXT: ret float 0x3FF0000020000000 +; + %res = call float @llvm.nvvm.div.rn.f(float 0x3FF5555560000000, float 0x3FF5555540000000) + ret float %res +} + +define float @test_div_just_above_1_rp_f() { +; CHECK-LABEL: define float @test_div_just_above_1_rp_f() { +; CHECK-NEXT: ret float 0x3FF0000020000000 +; + %res = call float @llvm.nvvm.div.rp.f(float 0x3FF5555560000000, float 0x3FF5555540000000) + ret float %res +} + +define float @test_div_just_above_1_rz_f() { +; CHECK-LABEL: define float @test_div_just_above_1_rz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.div.rz.f(float 0x3FF5555560000000, float 0x3FF5555540000000) + ret float %res +} + +define float @test_div_just_above_1_rm_ftz_f() { +; CHECK-LABEL: define float @test_div_just_above_1_rm_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.div.rm.ftz.f(float 0x3FF5555560000000, float 0x3FF5555540000000) + ret float %res +} + +define float @test_div_just_above_1_rn_ftz_f() { +; CHECK-LABEL: define float @test_div_just_above_1_rn_ftz_f() { +; CHECK-NEXT: ret float 0x3FF0000020000000 +; + %res = call float @llvm.nvvm.div.rn.ftz.f(float 0x3FF5555560000000, float 0x3FF5555540000000) + ret float %res +} + +define float @test_div_just_above_1_rp_ftz_f() { +; CHECK-LABEL: define float @test_div_just_above_1_rp_ftz_f() { +; CHECK-NEXT: ret float 0x3FF0000020000000 +; + %res = call float @llvm.nvvm.div.rp.ftz.f(float 0x3FF5555560000000, float 0x3FF5555540000000) + ret float %res +} + +define float @test_div_just_above_1_rz_ftz_f() { +; CHECK-LABEL: define float @test_div_just_above_1_rz_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.div.rz.ftz.f(float 0x3FF5555560000000, float 0x3FF5555540000000) + ret float %res +} + +;############################################################### +;# Div(4/3 + epsilon / 4/3 - epsilon) # +;############################################################### +; Tests division of numbers just above and just below 4/3. +; The result falls between 1.0 and 1.0 + 2^-52 +; - RZ, RM round to 1.0 (rounding towards zero/down) +; - RN, RP rounds to 1.0 + 2^-23 (rounding towards nearest/up) + +define double @test_div_just_above_1_rm_d() { +; CHECK-LABEL: define double @test_div_just_above_1_rm_d() { +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = call double @llvm.nvvm.div.rm.d(double 0x3FF5555555555555, double 0x3FF5555555555554) + ret double %res +} + +define double @test_div_just_above_1_rn_d() { +; CHECK-LABEL: define double @test_div_just_above_1_rn_d() { +; CHECK-NEXT: ret double 0x3FF0000000000001 +; + %res = call double @llvm.nvvm.div.rn.d(double 0x3FF5555555555555, double 0x3FF5555555555554) + ret double %res +} + +define double @test_div_just_above_1_rp_d() { +; CHECK-LABEL: define double @test_div_just_above_1_rp_d() { +; CHECK-NEXT: ret double 0x3FF0000000000001 +; + %res = call double @llvm.nvvm.div.rp.d(double 0x3FF5555555555555, double 0x3FF5555555555554) + ret double %res +} + +define double @test_div_just_above_1_rz_d() { +; CHECK-LABEL: define double @test_div_just_above_1_rz_d() { +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = call double @llvm.nvvm.div.rz.d(double 0x3FF5555555555555, double 0x3FF5555555555554) + ret double %res +} + +;############################################################### +;# Div( -(4/3 + epsilon), 4/3 - epsilon ) # +;############################################################### +; Tests division of numbers just below -4/3 and just below 4/3. +; The result falls between -1.0 and -1.0 - 2^-52 +; - RZ, RP round to -1.0 (rounding towards zero/up) +; - RN, RM rounds to -1.0 - 2^-52 (rounding towards nearest/down) + +define float @test_div_just_below_negative_1_rm_f() { +; CHECK-LABEL: define float @test_div_just_below_negative_1_rm_f() { +; CHECK-NEXT: ret float 0xBFF0000020000000 +; + %res = call float @llvm.nvvm.div.rm.f(float 0xBFF5555560000000, float 0x3FF5555540000000) + ret float %res +} + +define float @test_div_just_below_negative_1_rn_f() { +; CHECK-LABEL: define float @test_div_just_below_negative_1_rn_f() { +; CHECK-NEXT: ret float 0xBFF0000020000000 +; + %res = call float @llvm.nvvm.div.rn.f(float 0xBFF5555560000000, float 0x3FF5555540000000) + ret float %res +} + +define float @test_div_just_below_negative_1_rp_f() { +; CHECK-LABEL: define float @test_div_just_below_negative_1_rp_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.div.rp.f(float 0xBFF5555560000000, float 0x3FF5555540000000) + ret float %res +} + +define float @test_div_just_below_negative_1_rz_f() { +; CHECK-LABEL: define float @test_div_just_below_negative_1_rz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.div.rz.f(float 0xBFF5555560000000, float 0x3FF5555540000000) + ret float %res +} + +define float @test_div_just_below_negative_1_rm_ftz_f() { +; CHECK-LABEL: define float @test_div_just_below_negative_1_rm_ftz_f() { +; CHECK-NEXT: ret float 0xBFF0000020000000 +; + %res = call float @llvm.nvvm.div.rm.ftz.f(float 0xBFF5555560000000, float 0x3FF5555540000000) + ret float %res +} + +define float @test_div_just_below_negative_1_rn_ftz_f() { +; CHECK-LABEL: define float @test_div_just_below_negative_1_rn_ftz_f() { +; CHECK-NEXT: ret float 0xBFF0000020000000 +; + %res = call float @llvm.nvvm.div.rn.ftz.f(float 0xBFF5555560000000, float 0x3FF5555540000000) + ret float %res +} + +define float @test_div_just_below_negative_1_rp_ftz_f() { +; CHECK-LABEL: define float @test_div_just_below_negative_1_rp_ftz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.div.rp.ftz.f(float 0xBFF5555560000000, float 0x3FF5555540000000) + ret float %res +} + +define float @test_div_just_below_negative_1_rz_ftz_f() { +; CHECK-LABEL: define float @test_div_just_below_negative_1_rz_ftz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.div.rz.ftz.f(float 0xBFF5555560000000, float 0x3FF5555540000000) + ret float %res +} + +;############################################################### +;# Div( -(4/3 + epsilon), 4/3 - epsilon ) # +;############################################################### +; Tests division of numbers just below -4/3 and just below 4/3. +; The result falls between -1.0 and -1.0 - 2^-52 +; - RZ, RP round to -1.0 (rounding towards zero/up) +; - RN, RM rounds to -1.0 - 2^-52 (rounding towards nearest/down) + +define double @test_div_just_below_negative_1_rm_d() { +; CHECK-LABEL: define double @test_div_just_below_negative_1_rm_d() { +; CHECK-NEXT: ret double 0xBFF0000000000001 +; + %res = call double @llvm.nvvm.div.rm.d(double 0xBFF5555555555555, double 0x3FF5555555555554) + ret double %res +} + +define double @test_div_just_below_negative_1_rn_d() { +; CHECK-LABEL: define double @test_div_just_below_negative_1_rn_d() { +; CHECK-NEXT: ret double 0xBFF0000000000001 +; + %res = call double @llvm.nvvm.div.rn.d(double 0xBFF5555555555555, double 0x3FF5555555555554) + ret double %res +} + +define double @test_div_just_below_negative_1_rp_d() { +; CHECK-LABEL: define double @test_div_just_below_negative_1_rp_d() { +; CHECK-NEXT: ret double -1.000000e+00 +; + %res = call double @llvm.nvvm.div.rp.d(double 0xBFF5555555555555, double 0x3FF5555555555554) + ret double %res +} + +define double @test_div_just_below_negative_1_rz_d() { +; CHECK-LABEL: define double @test_div_just_below_negative_1_rz_d() { +; CHECK-NEXT: ret double -1.000000e+00 +; + %res = call double @llvm.nvvm.div.rz.d(double 0xBFF5555555555555, double 0x3FF5555555555554) + ret double %res +} + +;############################################################### +;# Div(~4/3 , ~4/3 + epsilon) # +;############################################################### +; Tests division of ~4/3 by a value just over 4/3 +; The exact result falls between 1.0 - 2^23 ( = 0x3FEFFFFFC0000000) +; and 1.0 - (2^23 + 2^24) ( = 0x3FEFFFFFA0000000). +; - RN, RZ, RM round to 1.0 - 2^-23 - 2^-24 (rounding towards nearest/zero/down) +; - RP rounds to 1.0 - 2^-23 (rounding up) + +define float @test_div_just_below_1_rm_f() { +; CHECK-LABEL: define float @test_div_just_below_1_rm_f() { +; CHECK-NEXT: ret float 0x3FEFFFFFA0000000 +; + %res = call float @llvm.nvvm.div.rm.f(float 0x3FF5555540000000, float 0x3FF5555580000000) + ret float %res +} + +define float @test_div_just_below_1_rn_f() { +; CHECK-LABEL: define float @test_div_just_below_1_rn_f() { +; CHECK-NEXT: ret float 0x3FEFFFFFA0000000 +; + %res = call float @llvm.nvvm.div.rn.f(float 0x3FF5555540000000, float 0x3FF5555580000000) + ret float %res +} + +define float @test_div_just_below_1_rp_f() { +; CHECK-LABEL: define float @test_div_just_below_1_rp_f() { +; CHECK-NEXT: ret float 0x3FEFFFFFC0000000 +; + %res = call float @llvm.nvvm.div.rp.f(float 0x3FF5555540000000, float 0x3FF5555580000000) + ret float %res +} + +define float @test_div_just_below_1_rz_f() { +; CHECK-LABEL: define float @test_div_just_below_1_rz_f() { +; CHECK-NEXT: ret float 0x3FEFFFFFA0000000 +; + %res = call float @llvm.nvvm.div.rz.f(float 0x3FF5555540000000, float 0x3FF5555580000000) + ret float %res +} + +define float @test_div_just_below_1_rm_ftz_f() { +; CHECK-LABEL: define float @test_div_just_below_1_rm_ftz_f() { +; CHECK-NEXT: ret float 0x3FEFFFFFA0000000 +; + %res = call float @llvm.nvvm.div.rm.ftz.f(float 0x3FF5555540000000, float 0x3FF5555580000000) + ret float %res +} + +define float @test_div_just_below_1_rn_ftz_f() { +; CHECK-LABEL: define float @test_div_just_below_1_rn_ftz_f() { +; CHECK-NEXT: ret float 0x3FEFFFFFA0000000 +; + %res = call float @llvm.nvvm.div.rn.ftz.f(float 0x3FF5555540000000, float 0x3FF5555580000000) + ret float %res +} + +define float @test_div_just_below_1_rp_ftz_f() { +; CHECK-LABEL: define float @test_div_just_below_1_rp_ftz_f() { +; CHECK-NEXT: ret float 0x3FEFFFFFC0000000 +; + %res = call float @llvm.nvvm.div.rp.ftz.f(float 0x3FF5555540000000, float 0x3FF5555580000000) + ret float %res +} + +define float @test_div_just_below_1_rz_ftz_f() { +; CHECK-LABEL: define float @test_div_just_below_1_rz_ftz_f() { +; CHECK-NEXT: ret float 0x3FEFFFFFA0000000 +; + %res = call float @llvm.nvvm.div.rz.ftz.f(float 0x3FF5555540000000, float 0x3FF5555580000000) + ret float %res +} + +;############################################################### +;# Div(~4/3 , ~4/3 + epsilon) # +;############################################################### +; Tests division of ~4/3 by a value just over 4/3 +; The exact result falls between 1.0 - 2^-51 - 2^-52 ( = 0x3FEFFFFFFFFFFFFA) +; and 1.0 - 2^-51 - 2^-53 ( = 0x3FEFFFFFFFFFFFFB). +; - RN, RZ, RM round to 1.0 - 2^-51 - 2^-52 (rounding towards nearest/zero/down) +; - RP rounds to 1.0 - 2^-51 - 2^-53 (rounding up) + +define double @test_div_just_below_1_rm_d() { +; CHECK-LABEL: define double @test_div_just_below_1_rm_d() { +; CHECK-NEXT: ret double 0x3FEFFFFFFFFFFFFA +; + %res = call double @llvm.nvvm.div.rm.d(double 0x3FF5555555555554, double 0x3FF5555555555558) + ret double %res +} + +define double @test_div_just_below_1_rn_d() { +; CHECK-LABEL: define double @test_div_just_below_1_rn_d() { +; CHECK-NEXT: ret double 0x3FEFFFFFFFFFFFFA +; + %res = call double @llvm.nvvm.div.rn.d(double 0x3FF5555555555554, double 0x3FF5555555555558) + ret double %res +} + +define double @test_div_just_below_1_rp_d() { +; CHECK-LABEL: define double @test_div_just_below_1_rp_d() { +; CHECK-NEXT: ret double 0x3FEFFFFFFFFFFFFB +; + %res = call double @llvm.nvvm.div.rp.d(double 0x3FF5555555555554, double 0x3FF5555555555558) + ret double %res +} + +define double @test_div_just_below_1_rz_d() { +; CHECK-LABEL: define double @test_div_just_below_1_rz_d() { +; CHECK-NEXT: ret double 0x3FEFFFFFFFFFFFFA +; + %res = call double @llvm.nvvm.div.rz.d(double 0x3FF5555555555554, double 0x3FF5555555555558) + ret double %res +} + +;############################################################### +;# Div(-4/3, ~4/3 + epsilon) # +;############################################################### +; Tests division of ~4/3 by a value just over 4/3 +; The exact result falls between 1.0 - 2^23 ( = 0x3FEFFFFFC0000000) +; and 1.0 - (2^23 + 2^24) ( = 0x3FEFFFFFA0000000). +; - RN, RZ, RP round to -1.0 + 2^-23 + + 2^-24 (rounding towards nearest/zero/up) +; - RM rounds to -1.0 + 2^-23 (rounding up) + +define float @test_div_just_above_negative_1_rm_f() { +; CHECK-LABEL: define float @test_div_just_above_negative_1_rm_f() { +; CHECK-NEXT: ret float 0xBFEFFFFFC0000000 +; + %res = call float @llvm.nvvm.div.rm.f(float 0xBFF5555540000000, float 0x3FF5555580000000) + ret float %res +} + +define float @test_div_just_above_negative_1_rn_f() { +; CHECK-LABEL: define float @test_div_just_above_negative_1_rn_f() { +; CHECK-NEXT: ret float 0xBFEFFFFFA0000000 +; + %res = call float @llvm.nvvm.div.rn.f(float 0xBFF5555540000000, float 0x3FF5555580000000) + ret float %res +} + +define float @test_div_just_above_negative_1_rp_f() { +; CHECK-LABEL: define float @test_div_just_above_negative_1_rp_f() { +; CHECK-NEXT: ret float 0xBFEFFFFFA0000000 +; + %res = call float @llvm.nvvm.div.rp.f(float 0xBFF5555540000000, float 0x3FF5555580000000) + ret float %res +} + +define float @test_div_just_above_negative_1_rz_f() { +; CHECK-LABEL: define float @test_div_just_above_negative_1_rz_f() { +; CHECK-NEXT: ret float 0xBFEFFFFFA0000000 +; + %res = call float @llvm.nvvm.div.rz.f(float 0xBFF5555540000000, float 0x3FF5555580000000) + ret float %res +} + +define float @test_div_just_above_negative_1_rm_ftz_f() { +; CHECK-LABEL: define float @test_div_just_above_negative_1_rm_ftz_f() { +; CHECK-NEXT: ret float 0xBFEFFFFFC0000000 +; + %res = call float @llvm.nvvm.div.rm.ftz.f(float 0xBFF5555540000000, float 0x3FF5555580000000) + ret float %res +} + +define float @test_div_just_above_negative_1_rn_ftz_f() { +; CHECK-LABEL: define float @test_div_just_above_negative_1_rn_ftz_f() { +; CHECK-NEXT: ret float 0xBFEFFFFFA0000000 +; + %res = call float @llvm.nvvm.div.rn.ftz.f(float 0xBFF5555540000000, float 0x3FF5555580000000) + ret float %res +} + +define float @test_div_just_above_negative_1_rp_ftz_f() { +; CHECK-LABEL: define float @test_div_just_above_negative_1_rp_ftz_f() { +; CHECK-NEXT: ret float 0xBFEFFFFFA0000000 +; + %res = call float @llvm.nvvm.div.rp.ftz.f(float 0xBFF5555540000000, float 0x3FF5555580000000) + ret float %res +} + +define float @test_div_just_above_negative_1_rz_ftz_f() { +; CHECK-LABEL: define float @test_div_just_above_negative_1_rz_ftz_f() { +; CHECK-NEXT: ret float 0xBFEFFFFFA0000000 +; + %res = call float @llvm.nvvm.div.rz.ftz.f(float 0xBFF5555540000000, float 0x3FF5555580000000) + ret float %res +} + +;############################################################### +;# Div(~4/3 , ~4/3 + epsilon) # +;############################################################### +; Tests division of ~4/3 by a value just over 4/3 +; The exact result falls between -1.0 + 2^-51 + 2^-52 ( = 0x3FEFFFFFFFFFFFFA) +; and -1.0 + 2^-51 + 2^-53 ( = 0x3FEFFFFFFFFFFFFB). +; - RN, RZ, RP round to -1.0 + 2^-51 + 2^-52 (rounding towards nearest/zero/up) +; - RM rounds to -1.0 + 2^-51 + 2^-53 (rounding down) + +define double @test_div_just_above_negative_1_rm_d() { +; CHECK-LABEL: define double @test_div_just_above_negative_1_rm_d() { +; CHECK-NEXT: ret double 0xBFEFFFFFFFFFFFFB +; + %res = call double @llvm.nvvm.div.rm.d(double 0xBFF5555555555554, double 0x3FF5555555555558) + ret double %res +} + +define double @test_div_just_above_negative_1_rn_d() { +; CHECK-LABEL: define double @test_div_just_above_negative_1_rn_d() { +; CHECK-NEXT: ret double 0xBFEFFFFFFFFFFFFA +; + %res = call double @llvm.nvvm.div.rn.d(double 0xBFF5555555555554, double 0x3FF5555555555558) + ret double %res +} + +define double @test_div_just_above_negative_1_rp_d() { +; CHECK-LABEL: define double @test_div_just_above_negative_1_rp_d() { +; CHECK-NEXT: ret double 0xBFEFFFFFFFFFFFFA +; + %res = call double @llvm.nvvm.div.rp.d(double 0xBFF5555555555554, double 0x3FF5555555555558) + ret double %res +} + +define double @test_div_just_above_negative_1_rz_d() { +; CHECK-LABEL: define double @test_div_just_above_negative_1_rz_d() { +; CHECK-NEXT: ret double 0xBFEFFFFFFFFFFFFA +; + %res = call double @llvm.nvvm.div.rz.d(double 0xBFF5555555555554, double 0x3FF5555555555558) + ret double %res +} + +;############################################################### +;# Div(NaN, NaN) # +;############################################################### +; Tests division of NaN by NaN to verify that constant folding is not performed +; when the result would be NaN. + +define float @test_nan_div_nan_rm_f() { +; CHECK-LABEL: define float @test_nan_div_nan_rm_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.div.rm.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.div.rm.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000) + ret float %res +} + +define float @test_nan_div_nan_rn_f() { +; CHECK-LABEL: define float @test_nan_div_nan_rn_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.div.rn.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.div.rn.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000) + ret float %res +} + +define float @test_nan_div_nan_rp_f() { +; CHECK-LABEL: define float @test_nan_div_nan_rp_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.div.rp.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.div.rp.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000) + ret float %res +} + +define float @test_nan_div_nan_rz_f() { +; CHECK-LABEL: define float @test_nan_div_nan_rz_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.div.rz.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.div.rz.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000) + ret float %res +} + +define float @test_nan_div_nan_rm_ftz_f() { +; CHECK-LABEL: define float @test_nan_div_nan_rm_ftz_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.div.rm.ftz.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.div.rm.ftz.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000) + ret float %res +} + +define float @test_nan_div_nan_rn_ftz_f() { +; CHECK-LABEL: define float @test_nan_div_nan_rn_ftz_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.div.rn.ftz.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.div.rn.ftz.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000) + ret float %res +} + +define float @test_nan_div_nan_rp_ftz_f() { +; CHECK-LABEL: define float @test_nan_div_nan_rp_ftz_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.div.rp.ftz.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.div.rp.ftz.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000) + ret float %res +} + +define float @test_nan_div_nan_rz_ftz_f() { +; CHECK-LABEL: define float @test_nan_div_nan_rz_ftz_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.div.rz.ftz.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.div.rz.ftz.f(float 0x7FFFFFFF00000000, float 0x7FFFFFFF00000000) + ret float %res +} + +define double @test_nan_div_nan_rm_d() { +; CHECK-LABEL: define double @test_nan_div_nan_rm_d() { +; CHECK-NEXT: [[RES:%.*]] = call double @llvm.nvvm.div.rm.d(double 0x7FFFFFFFFFFFFFFF, double 0x7FFFFFFFFFFFFFFF) +; CHECK-NEXT: ret double [[RES]] +; + %res = call double @llvm.nvvm.div.rm.d(double 0x7FFFFFFFFFFFFFFF, double 0x7FFFFFFFFFFFFFFF) + ret double %res +} + +define double @test_nan_div_nan_rn_d() { +; CHECK-LABEL: define double @test_nan_div_nan_rn_d() { +; CHECK-NEXT: [[RES:%.*]] = call double @llvm.nvvm.div.rn.d(double 0x7FFFFFFFFFFFFFFF, double 0x7FFFFFFFFFFFFFFF) +; CHECK-NEXT: ret double [[RES]] +; + %res = call double @llvm.nvvm.div.rn.d(double 0x7FFFFFFFFFFFFFFF, double 0x7FFFFFFFFFFFFFFF) + ret double %res +} + +define double @test_nan_div_nan_rp_d() { +; CHECK-LABEL: define double @test_nan_div_nan_rp_d() { +; CHECK-NEXT: [[RES:%.*]] = call double @llvm.nvvm.div.rp.d(double 0x7FFFFFFFFFFFFFFF, double 0x7FFFFFFFFFFFFFFF) +; CHECK-NEXT: ret double [[RES]] +; + %res = call double @llvm.nvvm.div.rp.d(double 0x7FFFFFFFFFFFFFFF, double 0x7FFFFFFFFFFFFFFF) + ret double %res +} + +define double @test_nan_div_nan_rz_d() { +; CHECK-LABEL: define double @test_nan_div_nan_rz_d() { +; CHECK-NEXT: [[RES:%.*]] = call double @llvm.nvvm.div.rz.d(double 0x7FFFFFFFFFFFFFFF, double 0x7FFFFFFFFFFFFFFF) +; CHECK-NEXT: ret double [[RES]] +; + %res = call double @llvm.nvvm.div.rz.d(double 0x7FFFFFFFFFFFFFFF, double 0x7FFFFFFFFFFFFFFF) + ret double %res +} diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fma.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fma.ll new file mode 100644 index 0000000000000..d52ff1ce66440 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fma.ll @@ -0,0 +1,874 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instsimplify -march=nvptx64 --mcpu=sm_86 --mattr=+ptx72 -S | FileCheck %s + +; Check constant-folding for NVVM FMA intrinsics with different rounding modes + +;############################################################### +;# FMA(2.0, 3.0, 4.0) # +;############################################################### +; Tests FMA with regular numbers that produce a precise result + +define double @test_fma_2_3_4_rm_d() { +; CHECK-LABEL: define double @test_fma_2_3_4_rm_d() { +; CHECK-NEXT: ret double 1.000000e+01 +; + %res = call double @llvm.nvvm.fma.rm.d(double 2.0, double 3.0, double 4.0) + ret double %res +} + +define double @test_fma_2_3_4_rn_d() { +; CHECK-LABEL: define double @test_fma_2_3_4_rn_d() { +; CHECK-NEXT: ret double 1.000000e+01 +; + %res = call double @llvm.nvvm.fma.rn.d(double 2.0, double 3.0, double 4.0) + ret double %res +} + +define double @test_fma_2_3_4_rp_d() { +; CHECK-LABEL: define double @test_fma_2_3_4_rp_d() { +; CHECK-NEXT: ret double 1.000000e+01 +; + %res = call double @llvm.nvvm.fma.rp.d(double 2.0, double 3.0, double 4.0) + ret double %res +} + +define double @test_fma_2_3_4_rz_d() { +; CHECK-LABEL: define double @test_fma_2_3_4_rz_d() { +; CHECK-NEXT: ret double 1.000000e+01 +; + %res = call double @llvm.nvvm.fma.rz.d(double 2.0, double 3.0, double 4.0) + ret double %res +} + +define float @test_fma_2_3_4_rm_f() { +; CHECK-LABEL: define float @test_fma_2_3_4_rm_f() { +; CHECK-NEXT: ret float 1.000000e+01 +; + %res = call float @llvm.nvvm.fma.rm.f(float 2.0, float 3.0, float 4.0) + ret float %res +} + +define float @test_fma_2_3_4_rn_f() { +; CHECK-LABEL: define float @test_fma_2_3_4_rn_f() { +; CHECK-NEXT: ret float 1.000000e+01 +; + %res = call float @llvm.nvvm.fma.rn.f(float 2.0, float 3.0, float 4.0) + ret float %res +} + +define float @test_fma_2_3_4_rp_f() { +; CHECK-LABEL: define float @test_fma_2_3_4_rp_f() { +; CHECK-NEXT: ret float 1.000000e+01 +; + %res = call float @llvm.nvvm.fma.rp.f(float 2.0, float 3.0, float 4.0) + ret float %res +} + +define float @test_fma_2_3_4_rz_f() { +; CHECK-LABEL: define float @test_fma_2_3_4_rz_f() { +; CHECK-NEXT: ret float 1.000000e+01 +; + %res = call float @llvm.nvvm.fma.rz.f(float 2.0, float 3.0, float 4.0) + ret float %res +} + +define float @test_fma_2_3_4_rm_ftz_f() { +; CHECK-LABEL: define float @test_fma_2_3_4_rm_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+01 +; + %res = call float @llvm.nvvm.fma.rm.ftz.f(float 2.0, float 3.0, float 4.0) + ret float %res +} + +define float @test_fma_2_3_4_rn_ftz_f() { +; CHECK-LABEL: define float @test_fma_2_3_4_rn_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+01 +; + %res = call float @llvm.nvvm.fma.rn.ftz.f(float 2.0, float 3.0, float 4.0) + ret float %res +} + +define float @test_fma_2_3_4_rp_ftz_f() { +; CHECK-LABEL: define float @test_fma_2_3_4_rp_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+01 +; + %res = call float @llvm.nvvm.fma.rp.ftz.f(float 2.0, float 3.0, float 4.0) + ret float %res +} + +define float @test_fma_2_3_4_rz_ftz_f() { +; CHECK-LABEL: define float @test_fma_2_3_4_rz_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+01 +; + %res = call float @llvm.nvvm.fma.rz.ftz.f(float 2.0, float 3.0, float 4.0) + ret float %res +} + +;############################################################### +;# FMA(Subnormal, 2.0, 0.0) -> Normal # +;############################################################### +; Tests FMA with subnormal multiplier that produces a normal result +; For float: 2^-127 * 2.0 = 2^-126 (smallest normal) +; For double: 2^-1023 * 2.0 = 2^-1022 (smallest normal) +; The FTZ variants should return 0.0, as they flush their input to zero. + +define double @test_fma_subnorm_to_norm_rm_d() { +; CHECK-LABEL: define double @test_fma_subnorm_to_norm_rm_d() { +; CHECK-NEXT: ret double 0x10000000000000 +; + %res = call double @llvm.nvvm.fma.rm.d(double 0x0008000000000000, double 2.0, double 0.0) + ret double %res +} + +define double @test_fma_subnorm_to_norm_rn_d() { +; CHECK-LABEL: define double @test_fma_subnorm_to_norm_rn_d() { +; CHECK-NEXT: ret double 0x10000000000000 +; + %res = call double @llvm.nvvm.fma.rn.d(double 0x0008000000000000, double 2.0, double 0.0) + ret double %res +} + +define double @test_fma_subnorm_to_norm_rp_d() { +; CHECK-LABEL: define double @test_fma_subnorm_to_norm_rp_d() { +; CHECK-NEXT: ret double 0x10000000000000 +; + %res = call double @llvm.nvvm.fma.rp.d(double 0x0008000000000000, double 2.0, double 0.0) + ret double %res +} + +define double @test_fma_subnorm_to_norm_rz_d() { +; CHECK-LABEL: define double @test_fma_subnorm_to_norm_rz_d() { +; CHECK-NEXT: ret double 0x10000000000000 +; + %res = call double @llvm.nvvm.fma.rz.d(double 0x0008000000000000, double 2.0, double 0.0) + ret double %res +} + +define float @test_fma_subnorm_to_norm_rm_f() { +; CHECK-LABEL: define float @test_fma_subnorm_to_norm_rm_f() { +; CHECK-NEXT: ret float 0x3810000000000000 +; + %res = call float @llvm.nvvm.fma.rm.f(float 0x3800000000000000, float 2.0, float 0.0) + ret float %res +} + +define float @test_fma_subnorm_to_norm_rn_f() { +; CHECK-LABEL: define float @test_fma_subnorm_to_norm_rn_f() { +; CHECK-NEXT: ret float 0x3810000000000000 +; + %res = call float @llvm.nvvm.fma.rn.f(float 0x3800000000000000, float 2.0, float 0.0) + ret float %res +} + +define float @test_fma_subnorm_to_norm_rp_f() { +; CHECK-LABEL: define float @test_fma_subnorm_to_norm_rp_f() { +; CHECK-NEXT: ret float 0x3810000000000000 +; + %res = call float @llvm.nvvm.fma.rp.f(float 0x3800000000000000, float 2.0, float 0.0) + ret float %res +} + +define float @test_fma_subnorm_to_norm_rz_f() { +; CHECK-LABEL: define float @test_fma_subnorm_to_norm_rz_f() { +; CHECK-NEXT: ret float 0x3810000000000000 +; + %res = call float @llvm.nvvm.fma.rz.f(float 0x3800000000000000, float 2.0, float 0.0) + ret float %res +} + +define float @test_fma_subnorm_to_norm_rm_ftz_f() { +; CHECK-LABEL: define float @test_fma_subnorm_to_norm_rm_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fma.rm.ftz.f(float 0x3800000000000000, float 2.0, float 0.0) + ret float %res +} + +define float @test_fma_subnorm_to_norm_rn_ftz_f() { +; CHECK-LABEL: define float @test_fma_subnorm_to_norm_rn_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fma.rn.ftz.f(float 0x3800000000000000, float 2.0, float 0.0) + ret float %res +} + +define float @test_fma_subnorm_to_norm_rp_ftz_f() { +; CHECK-LABEL: define float @test_fma_subnorm_to_norm_rp_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fma.rp.ftz.f(float 0x3800000000000000, float 2.0, float 0.0) + ret float %res +} + +define float @test_fma_subnorm_to_norm_rz_ftz_f() { +; CHECK-LABEL: define float @test_fma_subnorm_to_norm_rz_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fma.rz.ftz.f(float 0x3800000000000000, float 2.0, float 0.0) + ret float %res +} + +;############################################################### +;# FMA(Normal, 0.5, 0.0) -> Subnormal # +;############################################################### +; Tests FMA with normal inputs that produce a subnormal result +; For float: 2^-126 * 0.5 = 2^-127 (subnormal) +; For double: 2^-1022 * 0.5 = 2^-1023 (subnormal) +; With FTZ mode, the subnormal output should be flushed to zero. + +define double @test_fma_norm_to_subnorm_rm_d() { +; CHECK-LABEL: define double @test_fma_norm_to_subnorm_rm_d() { +; CHECK-NEXT: ret double 0x8000000000000 +; + %res = call double @llvm.nvvm.fma.rm.d(double 0x10000000000000, double 0.5, double 0.0) + ret double %res +} + +define double @test_fma_norm_to_subnorm_rn_d() { +; CHECK-LABEL: define double @test_fma_norm_to_subnorm_rn_d() { +; CHECK-NEXT: ret double 0x8000000000000 +; + %res = call double @llvm.nvvm.fma.rn.d(double 0x10000000000000, double 0.5, double 0.0) + ret double %res +} + +define double @test_fma_norm_to_subnorm_rp_d() { +; CHECK-LABEL: define double @test_fma_norm_to_subnorm_rp_d() { +; CHECK-NEXT: ret double 0x8000000000000 +; + %res = call double @llvm.nvvm.fma.rp.d(double 0x10000000000000, double 0.5, double 0.0) + ret double %res +} + +define double @test_fma_norm_to_subnorm_rz_d() { +; CHECK-LABEL: define double @test_fma_norm_to_subnorm_rz_d() { +; CHECK-NEXT: ret double 0x8000000000000 +; + %res = call double @llvm.nvvm.fma.rz.d(double 0x10000000000000, double 0.5, double 0.0) + ret double %res +} + +define float @test_fma_norm_to_subnorm_rm_f() { +; CHECK-LABEL: define float @test_fma_norm_to_subnorm_rm_f() { +; CHECK-NEXT: ret float 0x3800000000000000 +; + %res = call float @llvm.nvvm.fma.rm.f(float 0x3810000000000000, float 0.5, float 0.0) + ret float %res +} + +define float @test_fma_norm_to_subnorm_rn_f() { +; CHECK-LABEL: define float @test_fma_norm_to_subnorm_rn_f() { +; CHECK-NEXT: ret float 0x3800000000000000 +; + %res = call float @llvm.nvvm.fma.rn.f(float 0x3810000000000000, float 0.5, float 0.0) + ret float %res +} + +define float @test_fma_norm_to_subnorm_rp_f() { +; CHECK-LABEL: define float @test_fma_norm_to_subnorm_rp_f() { +; CHECK-NEXT: ret float 0x3800000000000000 +; + %res = call float @llvm.nvvm.fma.rp.f(float 0x3810000000000000, float 0.5, float 0.0) + ret float %res +} + +define float @test_fma_norm_to_subnorm_rz_f() { +; CHECK-LABEL: define float @test_fma_norm_to_subnorm_rz_f() { +; CHECK-NEXT: ret float 0x3800000000000000 +; + %res = call float @llvm.nvvm.fma.rz.f(float 0x3810000000000000, float 0.5, float 0.0) + ret float %res +} + +define float @test_fma_norm_to_subnorm_rm_ftz_f() { +; CHECK-LABEL: define float @test_fma_norm_to_subnorm_rm_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fma.rm.ftz.f(float 0x3810000000000000, float 0.5, float 0.0) + ret float %res +} + +define float @test_fma_norm_to_subnorm_rn_ftz_f() { +; CHECK-LABEL: define float @test_fma_norm_to_subnorm_rn_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fma.rn.ftz.f(float 0x3810000000000000, float 0.5, float 0.0) + ret float %res +} + +define float @test_fma_norm_to_subnorm_rp_ftz_f() { +; CHECK-LABEL: define float @test_fma_norm_to_subnorm_rp_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fma.rp.ftz.f(float 0x3810000000000000, float 0.5, float 0.0) + ret float %res +} + +define float @test_fma_norm_to_subnorm_rz_ftz_f() { +; CHECK-LABEL: define float @test_fma_norm_to_subnorm_rz_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fma.rz.ftz.f(float 0x3810000000000000, float 0.5, float 0.0) + ret float %res +} + +;############################################################### +;# FMA(NaN, 1.0, 0.0) # +;############################################################### +; Tests FMA with NaN input to verify that the instruction is preserved +; since the result would be NaN. The instruction should not be folded. + +define double @test_fma_nan_rm_d() { +; CHECK-LABEL: define double @test_fma_nan_rm_d() { +; CHECK-NEXT: [[RES:%.*]] = call double @llvm.nvvm.fma.rm.d(double 0x7FF8000000000000, double 1.000000e+00, double 0.000000e+00) +; CHECK-NEXT: ret double [[RES]] +; + %res = call double @llvm.nvvm.fma.rm.d(double 0x7FF8000000000000, double 1.0, double 0.0) + ret double %res +} + +define double @test_fma_nan_rn_d() { +; CHECK-LABEL: define double @test_fma_nan_rn_d() { +; CHECK-NEXT: [[RES:%.*]] = call double @llvm.nvvm.fma.rn.d(double 0x7FF8000000000000, double 1.000000e+00, double 0.000000e+00) +; CHECK-NEXT: ret double [[RES]] +; + %res = call double @llvm.nvvm.fma.rn.d(double 0x7FF8000000000000, double 1.0, double 0.0) + ret double %res +} + +define double @test_fma_nan_rp_d() { +; CHECK-LABEL: define double @test_fma_nan_rp_d() { +; CHECK-NEXT: [[RES:%.*]] = call double @llvm.nvvm.fma.rp.d(double 0x7FF8000000000000, double 1.000000e+00, double 0.000000e+00) +; CHECK-NEXT: ret double [[RES]] +; + %res = call double @llvm.nvvm.fma.rp.d(double 0x7FF8000000000000, double 1.0, double 0.0) + ret double %res +} + +define double @test_fma_nan_rz_d() { +; CHECK-LABEL: define double @test_fma_nan_rz_d() { +; CHECK-NEXT: [[RES:%.*]] = call double @llvm.nvvm.fma.rz.d(double 0x7FF8000000000000, double 1.000000e+00, double 0.000000e+00) +; CHECK-NEXT: ret double [[RES]] +; + %res = call double @llvm.nvvm.fma.rz.d(double 0x7FF8000000000000, double 1.0, double 0.0) + ret double %res +} + +define float @test_fma_nan_rm_f() { +; CHECK-LABEL: define float @test_fma_nan_rm_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.fma.rm.f(float 0x7FFC000000000000, float 1.000000e+00, float 0.000000e+00) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.fma.rm.f(float 0x7FFC000000000000, float 1.0, float 0.0) + ret float %res +} + +define float @test_fma_nan_rn_f() { +; CHECK-LABEL: define float @test_fma_nan_rn_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.fma.rn.f(float 0x7FFC000000000000, float 1.000000e+00, float 0.000000e+00) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.fma.rn.f(float 0x7FFC000000000000, float 1.0, float 0.0) + ret float %res +} + +define float @test_fma_nan_rp_f() { +; CHECK-LABEL: define float @test_fma_nan_rp_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.fma.rp.f(float 0x7FFC000000000000, float 1.000000e+00, float 0.000000e+00) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.fma.rp.f(float 0x7FFC000000000000, float 1.0, float 0.0) + ret float %res +} + +define float @test_fma_nan_rz_f() { +; CHECK-LABEL: define float @test_fma_nan_rz_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.fma.rz.f(float 0x7FFC000000000000, float 1.000000e+00, float 0.000000e+00) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.fma.rz.f(float 0x7FFC000000000000, float 1.0, float 0.0) + ret float %res +} + +define float @test_fma_nan_rm_ftz_f() { +; CHECK-LABEL: define float @test_fma_nan_rm_ftz_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.fma.rm.ftz.f(float 0x7FFC000000000000, float 1.000000e+00, float 0.000000e+00) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.fma.rm.ftz.f(float 0x7FFC000000000000, float 1.0, float 0.0) + ret float %res +} + +define float @test_fma_nan_rn_ftz_f() { +; CHECK-LABEL: define float @test_fma_nan_rn_ftz_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.fma.rn.ftz.f(float 0x7FFC000000000000, float 1.000000e+00, float 0.000000e+00) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.fma.rn.ftz.f(float 0x7FFC000000000000, float 1.0, float 0.0) + ret float %res +} + +define float @test_fma_nan_rp_ftz_f() { +; CHECK-LABEL: define float @test_fma_nan_rp_ftz_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.fma.rp.ftz.f(float 0x7FFC000000000000, float 1.000000e+00, float 0.000000e+00) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.fma.rp.ftz.f(float 0x7FFC000000000000, float 1.0, float 0.0) + ret float %res +} + +define float @test_fma_nan_rz_ftz_f() { +; CHECK-LABEL: define float @test_fma_nan_rz_ftz_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.fma.rz.ftz.f(float 0x7FFC000000000000, float 1.000000e+00, float 0.000000e+00) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.fma.rz.ftz.f(float 0x7FFC000000000000, float 1.0, float 0.0) + ret float %res +} + +;############################################################### +;# FMA(1.0, 1.0, 2^(-25)) # +;############################################################### +; Tests FMA with 1.0 and 2^(-25) where different rounding modes produce different results. +; The exact result falls between 1.0 and 1.0 + 2^(-23). +; RM, RN, and RZ return 1.0, while RP returns 1.0 + 2^(-23) (0x3F800001). + +define float @test_fma_1_plus_ulp_rm_f() { +; CHECK-LABEL: define float @test_fma_1_plus_ulp_rm_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.fma.rm.f(float 1.0, float 1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_fma_1_plus_ulp_rn_f() { +; CHECK-LABEL: define float @test_fma_1_plus_ulp_rn_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.fma.rn.f(float 1.0, float 1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_fma_1_plus_ulp_rp_f() { +; CHECK-LABEL: define float @test_fma_1_plus_ulp_rp_f() { +; CHECK-NEXT: ret float 0x3FF0000020000000 +; + %res = call float @llvm.nvvm.fma.rp.f(float 1.0, float 1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_fma_1_plus_ulp_rz_f() { +; CHECK-LABEL: define float @test_fma_1_plus_ulp_rz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.fma.rz.f(float 1.0, float 1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_fma_1_plus_ulp_rm_ftz_f() { +; CHECK-LABEL: define float @test_fma_1_plus_ulp_rm_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.fma.rm.ftz.f(float 1.0, float 1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_fma_1_plus_ulp_rn_ftz_f() { +; CHECK-LABEL: define float @test_fma_1_plus_ulp_rn_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.fma.rn.ftz.f(float 1.0, float 1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_fma_1_plus_ulp_rp_ftz_f() { +; CHECK-LABEL: define float @test_fma_1_plus_ulp_rp_ftz_f() { +; CHECK-NEXT: ret float 0x3FF0000020000000 +; + %res = call float @llvm.nvvm.fma.rp.ftz.f(float 1.0, float 1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_fma_1_plus_ulp_rz_ftz_f() { +; CHECK-LABEL: define float @test_fma_1_plus_ulp_rz_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.fma.rz.ftz.f(float 1.0, float 1.0, float 0x3E60000000000000) + ret float %res +} + +;############################################################### +;# FMA(1.0, 1.0, 2^(-54)) # +;############################################################### +; Tests FMA with 1.0 and 2^(-54) where different rounding modes produce different results. +; The exact result falls between 1.0 and 1.0 + 2^(-52). +; - RN, RZ, RM: Return 1.0 (rounding to nearest/zero/down) +; - RP: Returns 1.0 + 2^(-52) (rounding up) + +define double @test_fma_1_plus_ulp_rm_d() { +; CHECK-LABEL: define double @test_fma_1_plus_ulp_rm_d() { +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = call double @llvm.nvvm.fma.rm.d(double 1.0, double 1.0, double 0x3C90000000000000) + ret double %res +} + +define double @test_fma_1_plus_ulp_rn_d() { +; CHECK-LABEL: define double @test_fma_1_plus_ulp_rn_d() { +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = call double @llvm.nvvm.fma.rn.d(double 1.0, double 1.0, double 0x3C90000000000000) + ret double %res +} + +define double @test_fma_1_plus_ulp_rp_d() { +; CHECK-LABEL: define double @test_fma_1_plus_ulp_rp_d() { +; CHECK-NEXT: ret double 0x3FF0000000000001 +; + %res = call double @llvm.nvvm.fma.rp.d(double 1.0, double 1.0, double 0x3C90000000000000) + ret double %res +} + +define double @test_fma_1_plus_ulp_rz_d() { +; CHECK-LABEL: define double @test_fma_1_plus_ulp_rz_d() { +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = call double @llvm.nvvm.fma.rz.d(double 1.0, double 1.0, double 0x3C90000000000000) + ret double %res +} + +;############################################################### +;# FMA(1.0, -1.0, 2^(-25)) # +;############################################################### +; Tests FMA with -1.0 and 2^(-25) where different rounding modes produce different results. +; The exact result falls between -1.0 and -1.0 + 2^(-23). +; - RN, RM: Returns -1.0 (rounding toward nearest/down) +; - RZ, RP: Return -1.0 + 2^(-23) (rounding toward zero/up) + +define float @test_fma_neg_1_plus_ulp_rm_f() { +; CHECK-LABEL: define float @test_fma_neg_1_plus_ulp_rm_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.fma.rm.f(float 1.0, float -1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_fma_neg_1_plus_ulp_rn_f() { +; CHECK-LABEL: define float @test_fma_neg_1_plus_ulp_rn_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.fma.rn.f(float 1.0, float -1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_fma_neg_1_plus_ulp_rp_f() { +; CHECK-LABEL: define float @test_fma_neg_1_plus_ulp_rp_f() { +; CHECK-NEXT: ret float 0xBFEFFFFFE0000000 +; + %res = call float @llvm.nvvm.fma.rp.f(float 1.0, float -1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_fma_neg_1_plus_ulp_rz_f() { +; CHECK-LABEL: define float @test_fma_neg_1_plus_ulp_rz_f() { +; CHECK-NEXT: ret float 0xBFEFFFFFE0000000 +; + %res = call float @llvm.nvvm.fma.rz.f(float 1.0, float -1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_fma_neg_1_plus_ulp_rm_ftz_f() { +; CHECK-LABEL: define float @test_fma_neg_1_plus_ulp_rm_ftz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.fma.rm.ftz.f(float 1.0, float -1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_fma_neg_1_plus_ulp_rn_ftz_f() { +; CHECK-LABEL: define float @test_fma_neg_1_plus_ulp_rn_ftz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.fma.rn.ftz.f(float 1.0, float -1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_fma_neg_1_plus_ulp_rp_ftz_f() { +; CHECK-LABEL: define float @test_fma_neg_1_plus_ulp_rp_ftz_f() { +; CHECK-NEXT: ret float 0xBFEFFFFFE0000000 +; + %res = call float @llvm.nvvm.fma.rp.ftz.f(float 1.0, float -1.0, float 0x3E60000000000000) + ret float %res +} + +define float @test_fma_neg_1_plus_ulp_rz_ftz_f() { +; CHECK-LABEL: define float @test_fma_neg_1_plus_ulp_rz_ftz_f() { +; CHECK-NEXT: ret float 0xBFEFFFFFE0000000 +; + %res = call float @llvm.nvvm.fma.rz.ftz.f(float 1.0, float -1.0, float 0x3E60000000000000) + ret float %res +} + +;############################################################### +;# FMA(1.0, -1.0, 2^(-54)) # +;############################################################### +; Tests FMA with -1.0 and 2^(-54) where different rounding modes produce different results. +; The exact result falls between -1.0 and -1.0 + 2^(-52). +; - RN, RM: Return -1.0 (rounding toward nearest/down) +; - RZ, RP: Return -1.0 + 2^(-52) (rounding toward zero/up) + +define double @test_fma_neg_1_plus_ulp_rm_d() { +; CHECK-LABEL: define double @test_fma_neg_1_plus_ulp_rm_d() { +; CHECK-NEXT: ret double -1.000000e+00 +; + %res = call double @llvm.nvvm.fma.rm.d(double 1.0, double -1.0, double 0x3C90000000000000) + ret double %res +} + +define double @test_fma_neg_1_plus_ulp_rn_d() { +; CHECK-LABEL: define double @test_fma_neg_1_plus_ulp_rn_d() { +; CHECK-NEXT: ret double -1.000000e+00 +; + %res = call double @llvm.nvvm.fma.rn.d(double 1.0, double -1.0, double 0x3C90000000000000) + ret double %res +} + +define double @test_fma_neg_1_plus_ulp_rp_d() { +; CHECK-LABEL: define double @test_fma_neg_1_plus_ulp_rp_d() { +; CHECK-NEXT: ret double 0xBFEFFFFFFFFFFFFF +; + %res = call double @llvm.nvvm.fma.rp.d(double 1.0, double -1.0, double 0x3C90000000000000) + ret double %res +} + +define double @test_fma_neg_1_plus_ulp_rz_d() { +; CHECK-LABEL: define double @test_fma_neg_1_plus_ulp_rz_d() { +; CHECK-NEXT: ret double 0xBFEFFFFFFFFFFFFF +; + %res = call double @llvm.nvvm.fma.rz.d(double 1.0, double -1.0, double 0x3C90000000000000) + ret double %res +} + +;############################################################### +;# FMA(1.0, 1.0, -2^(-25)) # +;############################################################### +; Tests FMA with 1.0 and -2^(-25) where different rounding modes produce different results. +; The exact result falls between 1.0 and 1.0 - 2^(-23). +; - RN, RP: Return 1.0 (rounding toward nearest/up) +; - RZ, RM: Return 1.0 - 2^(-23) (rounding toward zero/down) + +define float @test_fma_1_minus_ulp_rm_f() { +; CHECK-LABEL: define float @test_fma_1_minus_ulp_rm_f() { +; CHECK-NEXT: ret float 0x3FEFFFFFE0000000 +; + %res = call float @llvm.nvvm.fma.rm.f(float 1.0, float 1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_fma_1_minus_ulp_rn_f() { +; CHECK-LABEL: define float @test_fma_1_minus_ulp_rn_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.fma.rn.f(float 1.0, float 1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_fma_1_minus_ulp_rp_f() { +; CHECK-LABEL: define float @test_fma_1_minus_ulp_rp_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.fma.rp.f(float 1.0, float 1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_fma_1_minus_ulp_rz_f() { +; CHECK-LABEL: define float @test_fma_1_minus_ulp_rz_f() { +; CHECK-NEXT: ret float 0x3FEFFFFFE0000000 +; + %res = call float @llvm.nvvm.fma.rz.f(float 1.0, float 1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_fma_1_minus_ulp_rm_ftz_f() { +; CHECK-LABEL: define float @test_fma_1_minus_ulp_rm_ftz_f() { +; CHECK-NEXT: ret float 0x3FEFFFFFE0000000 +; + %res = call float @llvm.nvvm.fma.rm.ftz.f(float 1.0, float 1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_fma_1_minus_ulp_rn_ftz_f() { +; CHECK-LABEL: define float @test_fma_1_minus_ulp_rn_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.fma.rn.ftz.f(float 1.0, float 1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_fma_1_minus_ulp_rp_ftz_f() { +; CHECK-LABEL: define float @test_fma_1_minus_ulp_rp_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.fma.rp.ftz.f(float 1.0, float 1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_fma_1_minus_ulp_rz_ftz_f() { +; CHECK-LABEL: define float @test_fma_1_minus_ulp_rz_ftz_f() { +; CHECK-NEXT: ret float 0x3FEFFFFFE0000000 +; + %res = call float @llvm.nvvm.fma.rz.ftz.f(float 1.0, float 1.0, float 0xBE60000000000000) + ret float %res +} + +;############################################################### +;# FMA(1.0, 1.0, -2^(-54)) # +;############################################################### +; Tests FMA with 1.0 and -2^(-54) where different rounding modes produce different results. +; The exact result falls between 1.0 and 1.0 - 2^(-52). +; - RN, RP: Return 1.0 (rounding toward nearest/up) +; - RZ, RM: Return 1.0 - 2^(-52) (rounding toward zero/down) + +define double @test_fma_1_minus_ulp_rm_d() { +; CHECK-LABEL: define double @test_fma_1_minus_ulp_rm_d() { +; CHECK-NEXT: ret double 0x3FEFFFFFFFFFFFFF +; + %res = call double @llvm.nvvm.fma.rm.d(double 1.0, double 1.0, double 0xBC90000000000000) + ret double %res +} + +define double @test_fma_1_minus_ulp_rn_d() { +; CHECK-LABEL: define double @test_fma_1_minus_ulp_rn_d() { +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = call double @llvm.nvvm.fma.rn.d(double 1.0, double 1.0, double 0xBC90000000000000) + ret double %res +} + +define double @test_fma_1_minus_ulp_rp_d() { +; CHECK-LABEL: define double @test_fma_1_minus_ulp_rp_d() { +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = call double @llvm.nvvm.fma.rp.d(double 1.0, double 1.0, double 0xBC90000000000000) + ret double %res +} + +define double @test_fma_1_minus_ulp_rz_d() { +; CHECK-LABEL: define double @test_fma_1_minus_ulp_rz_d() { +; CHECK-NEXT: ret double 0x3FEFFFFFFFFFFFFF +; + %res = call double @llvm.nvvm.fma.rz.d(double 1.0, double 1.0, double 0xBC90000000000000) + ret double %res +} + +;############################################################### +;# FMA(1.0, -1.0, -2^(-25)) # +;############################################################### +; Tests FMA with -1.0 and -2^(-25) where different rounding modes produce different results. +; The exact result falls between -1.0 and -1.0 - 2^(-23). +; - RN, RZ, RP: Return -1.0 (rounding to nearest/zero/up) +; - RM: Return -1.0 - 2^(-23) (rounding down) + +define float @test_fma_neg_1_minus_ulp_rm_f() { +; CHECK-LABEL: define float @test_fma_neg_1_minus_ulp_rm_f() { +; CHECK-NEXT: ret float 0xBFF0000020000000 +; + %res = call float @llvm.nvvm.fma.rm.f(float 1.0, float -1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_fma_neg_1_minus_ulp_rn_f() { +; CHECK-LABEL: define float @test_fma_neg_1_minus_ulp_rn_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.fma.rn.f(float 1.0, float -1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_fma_neg_1_minus_ulp_rp_f() { +; CHECK-LABEL: define float @test_fma_neg_1_minus_ulp_rp_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.fma.rp.f(float 1.0, float -1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_fma_neg_1_minus_ulp_rz_f() { +; CHECK-LABEL: define float @test_fma_neg_1_minus_ulp_rz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.fma.rz.f(float 1.0, float -1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_fma_neg_1_minus_ulp_rm_ftz_f() { +; CHECK-LABEL: define float @test_fma_neg_1_minus_ulp_rm_ftz_f() { +; CHECK-NEXT: ret float 0xBFF0000020000000 +; + %res = call float @llvm.nvvm.fma.rm.ftz.f(float 1.0, float -1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_fma_neg_1_minus_ulp_rn_ftz_f() { +; CHECK-LABEL: define float @test_fma_neg_1_minus_ulp_rn_ftz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.fma.rn.ftz.f(float 1.0, float -1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_fma_neg_1_minus_ulp_rp_ftz_f() { +; CHECK-LABEL: define float @test_fma_neg_1_minus_ulp_rp_ftz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.fma.rp.ftz.f(float 1.0, float -1.0, float 0xBE60000000000000) + ret float %res +} + +define float @test_fma_neg_1_minus_ulp_rz_ftz_f() { +; CHECK-LABEL: define float @test_fma_neg_1_minus_ulp_rz_ftz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.fma.rz.ftz.f(float 1.0, float -1.0, float 0xBE60000000000000) + ret float %res +} + +;############################################################### +;# FMA(1.0, -1.0, -2^(-54)) # +;############################################################### +; Tests FMA with -1.0 and -2^(-54) where different rounding modes produce different results. +; The exact result falls between -1.0 and -1.0 - 2^(-52). +; - RN, RZ, RP: Return -1.0 (rounding to nearest/zero/up) +; - RM: Return -1.0 - 2^(-52) (rounding down) + +define double @test_fma_neg_1_minus_ulp_rm_d() { +; CHECK-LABEL: define double @test_fma_neg_1_minus_ulp_rm_d() { +; CHECK-NEXT: ret double 0xBFF0000000000001 +; + %res = call double @llvm.nvvm.fma.rm.d(double 1.0, double -1.0, double 0xBC90000000000000) + ret double %res +} + +define double @test_fma_neg_1_minus_ulp_rn_d() { +; CHECK-LABEL: define double @test_fma_neg_1_minus_ulp_rn_d() { +; CHECK-NEXT: ret double -1.000000e+00 +; + %res = call double @llvm.nvvm.fma.rn.d(double 1.0, double -1.0, double 0xBC90000000000000) + ret double %res +} + +define double @test_fma_neg_1_minus_ulp_rp_d() { +; CHECK-LABEL: define double @test_fma_neg_1_minus_ulp_rp_d() { +; CHECK-NEXT: ret double -1.000000e+00 +; + %res = call double @llvm.nvvm.fma.rp.d(double 1.0, double -1.0, double 0xBC90000000000000) + ret double %res +} + +define double @test_fma_neg_1_minus_ulp_rz_d() { +; CHECK-LABEL: define double @test_fma_neg_1_minus_ulp_rz_d() { +; CHECK-NEXT: ret double -1.000000e+00 +; + %res = call double @llvm.nvvm.fma.rz.d(double 1.0, double -1.0, double 0xBC90000000000000) + ret double %res +} diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-mul.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-mul.ll new file mode 100644 index 0000000000000..12391e8bf0631 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-mul.ll @@ -0,0 +1,994 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instsimplify -march=nvptx64 --mcpu=sm_86 --mattr=+ptx72 -S | FileCheck %s + +; Check constant-folding for NVVM multiply intrinsics with different rounding modes + +;############################################################### +;# Mul(1.25, 2.0) # +;############################################################### +; Tests multiplication of two normal numbers (1.25 and 2.0) where the result +; is exactly representable. All rounding modes should produce the same result. + +define double @test_1_25_times_2_rm_d() { +; CHECK-LABEL: define double @test_1_25_times_2_rm_d() { +; CHECK-NEXT: ret double 2.500000e+00 +; + %res = call double @llvm.nvvm.mul.rm.d(double 1.25, double 2.0) + ret double %res +} + +define double @test_1_25_times_2_rn_d() { +; CHECK-LABEL: define double @test_1_25_times_2_rn_d() { +; CHECK-NEXT: ret double 2.500000e+00 +; + %res = call double @llvm.nvvm.mul.rn.d(double 1.25, double 2.0) + ret double %res +} + +define double @test_1_25_times_2_rp_d() { +; CHECK-LABEL: define double @test_1_25_times_2_rp_d() { +; CHECK-NEXT: ret double 2.500000e+00 +; + %res = call double @llvm.nvvm.mul.rp.d(double 1.25, double 2.0) + ret double %res +} + +define double @test_1_25_times_2_rz_d() { +; CHECK-LABEL: define double @test_1_25_times_2_rz_d() { +; CHECK-NEXT: ret double 2.500000e+00 +; + %res = call double @llvm.nvvm.mul.rz.d(double 1.25, double 2.0) + ret double %res +} + +define float @test_1_25_times_2_rm_f() { +; CHECK-LABEL: define float @test_1_25_times_2_rm_f() { +; CHECK-NEXT: ret float 2.500000e+00 +; + %res = call float @llvm.nvvm.mul.rm.f(float 1.25, float 2.0) + ret float %res +} + +define float @test_1_25_times_2_rn_f() { +; CHECK-LABEL: define float @test_1_25_times_2_rn_f() { +; CHECK-NEXT: ret float 2.500000e+00 +; + %res = call float @llvm.nvvm.mul.rn.f(float 1.25, float 2.0) + ret float %res +} + +define float @test_1_25_times_2_rp_f() { +; CHECK-LABEL: define float @test_1_25_times_2_rp_f() { +; CHECK-NEXT: ret float 2.500000e+00 +; + %res = call float @llvm.nvvm.mul.rp.f(float 1.25, float 2.0) + ret float %res +} + +define float @test_1_25_times_2_rz_f() { +; CHECK-LABEL: define float @test_1_25_times_2_rz_f() { +; CHECK-NEXT: ret float 2.500000e+00 +; + %res = call float @llvm.nvvm.mul.rz.f(float 1.25, float 2.0) + ret float %res +} + +define float @test_1_25_times_2_rm_ftz_f() { +; CHECK-LABEL: define float @test_1_25_times_2_rm_ftz_f() { +; CHECK-NEXT: ret float 2.500000e+00 +; + %res = call float @llvm.nvvm.mul.rm.ftz.f(float 1.25, float 2.0) + ret float %res +} + +define float @test_1_25_times_2_rn_ftz_f() { +; CHECK-LABEL: define float @test_1_25_times_2_rn_ftz_f() { +; CHECK-NEXT: ret float 2.500000e+00 +; + %res = call float @llvm.nvvm.mul.rn.ftz.f(float 1.25, float 2.0) + ret float %res +} + +define float @test_1_25_times_2_rp_ftz_f() { +; CHECK-LABEL: define float @test_1_25_times_2_rp_ftz_f() { +; CHECK-NEXT: ret float 2.500000e+00 +; + %res = call float @llvm.nvvm.mul.rp.ftz.f(float 1.25, float 2.0) + ret float %res +} + +define float @test_1_25_times_2_rz_ftz_f() { +; CHECK-LABEL: define float @test_1_25_times_2_rz_ftz_f() { +; CHECK-NEXT: ret float 2.500000e+00 +; + %res = call float @llvm.nvvm.mul.rz.ftz.f(float 1.25, float 2.0) + ret float %res +} + +;############################################################### +;# Mul(1.0, Subnormal) # +;############################################################### +; Tests multiplication of 1.0 by a subnormal number to verify FTZ behavior. +; For float, we use 2^-149 (smallest subnormal float). +; For double, we use 2^-1074 (smallest subnormal double). +; Without FTZ, the result should be the subnormal number. +; With FTZ, the result should be 0.0. + +define double @test_1_times_subnorm_rm_d() { +; CHECK-LABEL: define double @test_1_times_subnorm_rm_d() { +; CHECK-NEXT: ret double 4.940660e-324 +; + %res = call double @llvm.nvvm.mul.rm.d(double 1.0, double 0x0000000000000001) + ret double %res +} + +define double @test_1_times_subnorm_rn_d() { +; CHECK-LABEL: define double @test_1_times_subnorm_rn_d() { +; CHECK-NEXT: ret double 4.940660e-324 +; + %res = call double @llvm.nvvm.mul.rn.d(double 1.0, double 0x0000000000000001) + ret double %res +} + +define double @test_1_times_subnorm_rp_d() { +; CHECK-LABEL: define double @test_1_times_subnorm_rp_d() { +; CHECK-NEXT: ret double 4.940660e-324 +; + %res = call double @llvm.nvvm.mul.rp.d(double 1.0, double 0x0000000000000001) + ret double %res +} + +define double @test_1_times_subnorm_rz_d() { +; CHECK-LABEL: define double @test_1_times_subnorm_rz_d() { +; CHECK-NEXT: ret double 4.940660e-324 +; + %res = call double @llvm.nvvm.mul.rz.d(double 1.0, double 0x0000000000000001) + ret double %res +} + +define float @test_1_times_subnorm_rm_f() { +; CHECK-LABEL: define float @test_1_times_subnorm_rm_f() { +; CHECK-NEXT: ret float 0x36A0000000000000 +; + %res = call float @llvm.nvvm.mul.rm.f(float 1.0, float 0x36A0000000000000) + ret float %res +} + +define float @test_1_times_subnorm_rn_f() { +; CHECK-LABEL: define float @test_1_times_subnorm_rn_f() { +; CHECK-NEXT: ret float 0x36A0000000000000 +; + %res = call float @llvm.nvvm.mul.rn.f(float 1.0, float 0x36A0000000000000) + ret float %res +} + +define float @test_1_times_subnorm_rp_f() { +; CHECK-LABEL: define float @test_1_times_subnorm_rp_f() { +; CHECK-NEXT: ret float 0x36A0000000000000 +; + %res = call float @llvm.nvvm.mul.rp.f(float 1.0, float 0x36A0000000000000) + ret float %res +} + +define float @test_1_times_subnorm_rz_f() { +; CHECK-LABEL: define float @test_1_times_subnorm_rz_f() { +; CHECK-NEXT: ret float 0x36A0000000000000 +; + %res = call float @llvm.nvvm.mul.rz.f(float 1.0, float 0x36A0000000000000) + ret float %res +} + +define float @test_1_times_subnorm_rm_ftz_f() { +; CHECK-LABEL: define float @test_1_times_subnorm_rm_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.mul.rm.ftz.f(float 1.0, float 0x36A0000000000000) + ret float %res +} + +define float @test_1_times_subnorm_rn_ftz_f() { +; CHECK-LABEL: define float @test_1_times_subnorm_rn_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.mul.rn.ftz.f(float 1.0, float 0x36A0000000000000) + ret float %res +} + +define float @test_1_times_subnorm_rp_ftz_f() { +; CHECK-LABEL: define float @test_1_times_subnorm_rp_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.mul.rp.ftz.f(float 1.0, float 0x36A0000000000000) + ret float %res +} + +define float @test_1_times_subnorm_rz_ftz_f() { +; CHECK-LABEL: define float @test_1_times_subnorm_rz_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.mul.rz.ftz.f(float 1.0, float 0x36A0000000000000) + ret float %res +} + +;############################################################### +;# Mul(1.0, -Subnormal) # +;############################################################### +; Tests multiplication of 1.0 by a negative subnormal number to verify FTZ behavior. +; For float, we use -2^-149 (negative smallest subnormal). +; For double, we use -2^-1074 (negative smallest subnormal). +; Without FTZ, the result should be the negative subnormal number. +; With FTZ, the result should be -0.0. + +define double @test_1_times_neg_subnorm_rm_d() { +; CHECK-LABEL: define double @test_1_times_neg_subnorm_rm_d() { +; CHECK-NEXT: ret double -4.940660e-324 +; + %res = call double @llvm.nvvm.mul.rm.d(double 1.0, double 0x8000000000000001) + ret double %res +} + +define double @test_1_times_neg_subnorm_rn_d() { +; CHECK-LABEL: define double @test_1_times_neg_subnorm_rn_d() { +; CHECK-NEXT: ret double -4.940660e-324 +; + %res = call double @llvm.nvvm.mul.rn.d(double 1.0, double 0x8000000000000001) + ret double %res +} + +define double @test_1_times_neg_subnorm_rp_d() { +; CHECK-LABEL: define double @test_1_times_neg_subnorm_rp_d() { +; CHECK-NEXT: ret double -4.940660e-324 +; + %res = call double @llvm.nvvm.mul.rp.d(double 1.0, double 0x8000000000000001) + ret double %res +} + +define double @test_1_times_neg_subnorm_rz_d() { +; CHECK-LABEL: define double @test_1_times_neg_subnorm_rz_d() { +; CHECK-NEXT: ret double -4.940660e-324 +; + %res = call double @llvm.nvvm.mul.rz.d(double 1.0, double 0x8000000000000001) + ret double %res +} + +define float @test_1_times_neg_subnorm_rm_f() { +; CHECK-LABEL: define float @test_1_times_neg_subnorm_rm_f() { +; CHECK-NEXT: ret float 0xB6A0000000000000 +; + %res = call float @llvm.nvvm.mul.rm.f(float 1.0, float 0xB6A0000000000000) + ret float %res +} + +define float @test_1_times_neg_subnorm_rn_f() { +; CHECK-LABEL: define float @test_1_times_neg_subnorm_rn_f() { +; CHECK-NEXT: ret float 0xB6A0000000000000 +; + %res = call float @llvm.nvvm.mul.rn.f(float 1.0, float 0xB6A0000000000000) + ret float %res +} + +define float @test_1_times_neg_subnorm_rp_f() { +; CHECK-LABEL: define float @test_1_times_neg_subnorm_rp_f() { +; CHECK-NEXT: ret float 0xB6A0000000000000 +; + %res = call float @llvm.nvvm.mul.rp.f(float 1.0, float 0xB6A0000000000000) + ret float %res +} + +define float @test_1_times_neg_subnorm_rz_f() { +; CHECK-LABEL: define float @test_1_times_neg_subnorm_rz_f() { +; CHECK-NEXT: ret float 0xB6A0000000000000 +; + %res = call float @llvm.nvvm.mul.rz.f(float 1.0, float 0xB6A0000000000000) + ret float %res +} + +define float @test_1_times_neg_subnorm_rm_ftz_f() { +; CHECK-LABEL: define float @test_1_times_neg_subnorm_rm_ftz_f() { +; CHECK-NEXT: ret float -0.000000e+00 +; + %res = call float @llvm.nvvm.mul.rm.ftz.f(float 1.0, float 0xB6A0000000000000) + ret float %res +} + +define float @test_1_times_neg_subnorm_rn_ftz_f() { +; CHECK-LABEL: define float @test_1_times_neg_subnorm_rn_ftz_f() { +; CHECK-NEXT: ret float -0.000000e+00 +; + %res = call float @llvm.nvvm.mul.rn.ftz.f(float 1.0, float 0xB6A0000000000000) + ret float %res +} + +define float @test_1_times_neg_subnorm_rp_ftz_f() { +; CHECK-LABEL: define float @test_1_times_neg_subnorm_rp_ftz_f() { +; CHECK-NEXT: ret float -0.000000e+00 +; + %res = call float @llvm.nvvm.mul.rp.ftz.f(float 1.0, float 0xB6A0000000000000) + ret float %res +} + +define float @test_1_times_neg_subnorm_rz_ftz_f() { +; CHECK-LABEL: define float @test_1_times_neg_subnorm_rz_ftz_f() { +; CHECK-NEXT: ret float -0.000000e+00 +; + %res = call float @llvm.nvvm.mul.rz.ftz.f(float 1.0, float 0xB6A0000000000000) + ret float %res +} + +;############################################################### +;# Mul(Normal, Normal) -> Subnormal # +;############################################################### +; Tests multiplication of two normal numbers that produces a subnormal result. +; We multiply the smallest normal float (2^-126 = 0x3810000000000000) by 0.5 to get 2^-127, +; which is subnormal. This tests the transition from normal to subnormal numbers. +; For double precision, we just use the same float value since there is no FTZ variant. + +define double @test_normal_times_normal_to_subnorm_rm_d() { +; CHECK-LABEL: define double @test_normal_times_normal_to_subnorm_rm_d() { +; CHECK-NEXT: ret double 0x3800000000000000 +; + %res = call double @llvm.nvvm.mul.rm.d(double 0x3810000000000000, double 0.5) + ret double %res +} + +define double @test_normal_times_normal_to_subnorm_rn_d() { +; CHECK-LABEL: define double @test_normal_times_normal_to_subnorm_rn_d() { +; CHECK-NEXT: ret double 0x3800000000000000 +; + %res = call double @llvm.nvvm.mul.rn.d(double 0x3810000000000000, double 0.5) + ret double %res +} + +define double @test_normal_times_normal_to_subnorm_rp_d() { +; CHECK-LABEL: define double @test_normal_times_normal_to_subnorm_rp_d() { +; CHECK-NEXT: ret double 0x3800000000000000 +; + %res = call double @llvm.nvvm.mul.rp.d(double 0x3810000000000000, double 0.5) + ret double %res +} + +define double @test_normal_times_normal_to_subnorm_rz_d() { +; CHECK-LABEL: define double @test_normal_times_normal_to_subnorm_rz_d() { +; CHECK-NEXT: ret double 0x3800000000000000 +; + %res = call double @llvm.nvvm.mul.rz.d(double 0x3810000000000000, double 0.5) + ret double %res +} + +define float @test_normal_times_normal_to_subnorm_rm_f() { +; CHECK-LABEL: define float @test_normal_times_normal_to_subnorm_rm_f() { +; CHECK-NEXT: ret float 0x3800000000000000 +; + %res = call float @llvm.nvvm.mul.rm.f(float 0x3810000000000000, float 0.5) + ret float %res +} + +define float @test_normal_times_normal_to_subnorm_rn_f() { +; CHECK-LABEL: define float @test_normal_times_normal_to_subnorm_rn_f() { +; CHECK-NEXT: ret float 0x3800000000000000 +; + %res = call float @llvm.nvvm.mul.rn.f(float 0x3810000000000000, float 0.5) + ret float %res +} + +define float @test_normal_times_normal_to_subnorm_rp_f() { +; CHECK-LABEL: define float @test_normal_times_normal_to_subnorm_rp_f() { +; CHECK-NEXT: ret float 0x3800000000000000 +; + %res = call float @llvm.nvvm.mul.rp.f(float 0x3810000000000000, float 0.5) + ret float %res +} + +define float @test_normal_times_normal_to_subnorm_rz_f() { +; CHECK-LABEL: define float @test_normal_times_normal_to_subnorm_rz_f() { +; CHECK-NEXT: ret float 0x3800000000000000 +; + %res = call float @llvm.nvvm.mul.rz.f(float 0x3810000000000000, float 0.5) + ret float %res +} + +define float @test_normal_times_normal_to_subnorm_rm_ftz_f() { +; CHECK-LABEL: define float @test_normal_times_normal_to_subnorm_rm_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.mul.rm.ftz.f(float 0x3810000000000000, float 0.5) + ret float %res +} + +define float @test_normal_times_normal_to_subnorm_rn_ftz_f() { +; CHECK-LABEL: define float @test_normal_times_normal_to_subnorm_rn_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.mul.rn.ftz.f(float 0x3810000000000000, float 0.5) + ret float %res +} + +define float @test_normal_times_normal_to_subnorm_rp_ftz_f() { +; CHECK-LABEL: define float @test_normal_times_normal_to_subnorm_rp_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.mul.rp.ftz.f(float 0x3810000000000000, float 0.5) + ret float %res +} + +define float @test_normal_times_normal_to_subnorm_rz_ftz_f() { +; CHECK-LABEL: define float @test_normal_times_normal_to_subnorm_rz_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.mul.rz.ftz.f(float 0x3810000000000000, float 0.5) + ret float %res +} + +;############################################################### +;# Mul(2.0, NaN) # +;############################################################### +; Tests multiplication with NaN to verify that we do not fold these, +; as host and device NaNs may be different. + +define double @test_2_times_nan_rm_d() { +; CHECK-LABEL: define double @test_2_times_nan_rm_d() { +; CHECK-NEXT: [[RES:%.*]] = call double @llvm.nvvm.mul.rm.d(double 2.000000e+00, double 0x7FF4444400000000) +; CHECK-NEXT: ret double [[RES]] +; + %res = call double @llvm.nvvm.mul.rm.d(double 2.0, double 0x7FF4444400000000) + ret double %res +} + +define double @test_2_times_nan_rn_d() { +; CHECK-LABEL: define double @test_2_times_nan_rn_d() { +; CHECK-NEXT: [[RES:%.*]] = call double @llvm.nvvm.mul.rn.d(double 2.000000e+00, double 0x7FF4444400000000) +; CHECK-NEXT: ret double [[RES]] +; + %res = call double @llvm.nvvm.mul.rn.d(double 2.0, double 0x7FF4444400000000) + ret double %res +} + +define double @test_2_times_nan_rp_d() { +; CHECK-LABEL: define double @test_2_times_nan_rp_d() { +; CHECK-NEXT: [[RES:%.*]] = call double @llvm.nvvm.mul.rp.d(double 2.000000e+00, double 0x7FF4444400000000) +; CHECK-NEXT: ret double [[RES]] +; + %res = call double @llvm.nvvm.mul.rp.d(double 2.0, double 0x7FF4444400000000) + ret double %res +} + +define double @test_2_times_nan_rz_d() { +; CHECK-LABEL: define double @test_2_times_nan_rz_d() { +; CHECK-NEXT: [[RES:%.*]] = call double @llvm.nvvm.mul.rz.d(double 2.000000e+00, double 0x7FF4444400000000) +; CHECK-NEXT: ret double [[RES]] +; + %res = call double @llvm.nvvm.mul.rz.d(double 2.0, double 0x7FF4444400000000) + ret double %res +} + +define float @test_2_times_nan_rm_f() { +; CHECK-LABEL: define float @test_2_times_nan_rm_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.mul.rm.f(float 2.000000e+00, float 0x7FFF444400000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.mul.rm.f(float 2.0, float 0x7FFF444400000000) + ret float %res +} + +define float @test_2_times_nan_rn_f() { +; CHECK-LABEL: define float @test_2_times_nan_rn_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.mul.rn.f(float 2.000000e+00, float 0x7FFF444400000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.mul.rn.f(float 2.0, float 0x7FFF444400000000) + ret float %res +} + +define float @test_2_times_nan_rp_f() { +; CHECK-LABEL: define float @test_2_times_nan_rp_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.mul.rp.f(float 2.000000e+00, float 0x7FFF444400000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.mul.rp.f(float 2.0, float 0x7FFF444400000000) + ret float %res +} + +define float @test_2_times_nan_rz_f() { +; CHECK-LABEL: define float @test_2_times_nan_rz_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.mul.rz.f(float 2.000000e+00, float 0x7FFF444400000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.mul.rz.f(float 2.0, float 0x7FFF444400000000) + ret float %res +} + +define float @test_2_times_nan_rm_ftz_f() { +; CHECK-LABEL: define float @test_2_times_nan_rm_ftz_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.mul.rm.ftz.f(float 2.000000e+00, float 0x7FFF444400000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.mul.rm.ftz.f(float 2.0, float 0x7FFF444400000000) + ret float %res +} + +define float @test_2_times_nan_rn_ftz_f() { +; CHECK-LABEL: define float @test_2_times_nan_rn_ftz_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.mul.rn.ftz.f(float 2.000000e+00, float 0x7FFF444400000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.mul.rn.ftz.f(float 2.0, float 0x7FFF444400000000) + ret float %res +} + +define float @test_2_times_nan_rp_ftz_f() { +; CHECK-LABEL: define float @test_2_times_nan_rp_ftz_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.mul.rp.ftz.f(float 2.000000e+00, float 0x7FFF444400000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.mul.rp.ftz.f(float 2.0, float 0x7FFF444400000000) + ret float %res +} + +define float @test_2_times_nan_rz_ftz_f() { +; CHECK-LABEL: define float @test_2_times_nan_rz_ftz_f() { +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.mul.rz.ftz.f(float 2.000000e+00, float 0x7FFF444400000000) +; CHECK-NEXT: ret float [[RES]] +; + %res = call float @llvm.nvvm.mul.rz.ftz.f(float 2.0, float 0x7FFF444400000000) + ret float %res +} + +;############################################################### +;# Mul(0.75, 4/3 + epsilon) # +;############################################################### +; Tests multiplication of 0.75 (3/4) by a value slightly above 4/3, +; where different rounding modes produce different results. +; The exact result would be 1.0, but since 4/3 cannot be exactly encoded +; as a float, the calculated result falls between 1.0 and 1.0 + 2^-23. +; - RN, RZ, RM round to 1.0 (rounding to nearest/zero/down) +; - RP rounds to 1.0 + 2^-23 (rounding up) + +define float @test_mul_just_above_1_rm_f() { +; CHECK-LABEL: define float @test_mul_just_above_1_rm_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.mul.rm.f(float 0.75, float 0x3FF5555560000000) + ret float %res +} + +define float @test_mul_just_above_1_rn_f() { +; CHECK-LABEL: define float @test_mul_just_above_1_rn_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.mul.rn.f(float 0.75, float 0x3FF5555560000000) + ret float %res +} + +define float @test_mul_just_above_1_rp_f() { +; CHECK-LABEL: define float @test_mul_just_above_1_rp_f() { +; CHECK-NEXT: ret float 0x3FF0000020000000 +; + %res = call float @llvm.nvvm.mul.rp.f(float 0.75, float 0x3FF5555560000000) + ret float %res +} + +define float @test_mul_just_above_1_rz_f() { +; CHECK-LABEL: define float @test_mul_just_above_1_rz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.mul.rz.f(float 0.75, float 0x3FF5555560000000) + ret float %res +} + +define float @test_mul_just_above_1_rm_ftz_f() { +; CHECK-LABEL: define float @test_mul_just_above_1_rm_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.mul.rm.ftz.f(float 0.75, float 0x3FF5555560000000) + ret float %res +} + +define float @test_mul_just_above_1_rn_ftz_f() { +; CHECK-LABEL: define float @test_mul_just_above_1_rn_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.mul.rn.ftz.f(float 0.75, float 0x3FF5555560000000) + ret float %res +} + +define float @test_mul_just_above_1_rp_ftz_f() { +; CHECK-LABEL: define float @test_mul_just_above_1_rp_ftz_f() { +; CHECK-NEXT: ret float 0x3FF0000020000000 +; + %res = call float @llvm.nvvm.mul.rp.ftz.f(float 0.75, float 0x3FF5555560000000) + ret float %res +} + +define float @test_mul_just_above_1_rz_ftz_f() { +; CHECK-LABEL: define float @test_mul_just_above_1_rz_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.mul.rz.ftz.f(float 0.75, float 0x3FF5555560000000) + ret float %res +} + +;############################################################### +;# Mul(0.75, 4/3 + epsilon) # +;############################################################### +; Tests multiplication of 0.75 by a value slightly above 4/3, +; where different rounding modes produce different results. +; The exact result would be 1.0, but since 4/3 cannot be exactly encoded +; as a double, the calculated result falls between 1.0 and 1.0 + 2^-52. +; - RN, RZ, RM round to 1.0 (rounding to nearest/zero/down) +; - RP rounds to 1.0 + 2^-52 (rounding up) + +define double @test_mul_just_above_1_rm_d() { +; CHECK-LABEL: define double @test_mul_just_above_1_rm_d() { +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = call double @llvm.nvvm.mul.rm.d(double 0.75, double 0x3FF5555555555556) + ret double %res +} + +define double @test_mul_just_above_1_rn_d() { +; CHECK-LABEL: define double @test_mul_just_above_1_rn_d() { +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = call double @llvm.nvvm.mul.rn.d(double 0.75, double 0x3FF5555555555556) + ret double %res +} + +define double @test_mul_just_above_1_rp_d() { +; CHECK-LABEL: define double @test_mul_just_above_1_rp_d() { +; CHECK-NEXT: ret double 0x3FF0000000000001 +; + %res = call double @llvm.nvvm.mul.rp.d(double 0.75, double 0x3FF5555555555556) + ret double %res +} + +define double @test_mul_just_above_1_rz_d() { +; CHECK-LABEL: define double @test_mul_just_above_1_rz_d() { +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = call double @llvm.nvvm.mul.rz.d(double 0.75, double 0x3FF5555555555556) + ret double %res +} + +;############################################################### +;# Mul(-0.75, 4/3 + epsilon) # +;############################################################### +; Tests multiplication of -0.75 by a value slightly above 4/3, +; where different rounding modes produce different results. +; The exact result would be -1.0, but since 4/3 cannot be exactly encoded +; as a double, the calculated result falls between -1.0 and -1.0 - 2^-23. +; - RN, RZ, RP round to -1.0 (rounding to nearest/zero/up) +; - RM rounds to -1.0 - 2^-23 (rounding down) + +define float @test_mul_just_below_negative_1_rm_f() { +; CHECK-LABEL: define float @test_mul_just_below_negative_1_rm_f() { +; CHECK-NEXT: ret float 0xBFF0000020000000 +; + %res = call float @llvm.nvvm.mul.rm.f(float -0.75, float 0x3FF5555560000000) + ret float %res +} + +define float @test_mul_just_below_negative_1_rn_f() { +; CHECK-LABEL: define float @test_mul_just_below_negative_1_rn_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.mul.rn.f(float -0.75, float 0x3FF5555560000000) + ret float %res +} + +define float @test_mul_just_below_negative_1_rp_f() { +; CHECK-LABEL: define float @test_mul_just_below_negative_1_rp_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.mul.rp.f(float -0.75, float 0x3FF5555560000000) + ret float %res +} + +define float @test_mul_just_below_negative_1_rz_f() { +; CHECK-LABEL: define float @test_mul_just_below_negative_1_rz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.mul.rz.f(float -0.75, float 0x3FF5555560000000) + ret float %res +} + +define float @test_mul_just_below_negative_1_rm_ftz_f() { +; CHECK-LABEL: define float @test_mul_just_below_negative_1_rm_ftz_f() { +; CHECK-NEXT: ret float 0xBFF0000020000000 +; + %res = call float @llvm.nvvm.mul.rm.ftz.f(float -0.75, float 0x3FF5555560000000) + ret float %res +} + +define float @test_mul_just_below_negative_1_rn_ftz_f() { +; CHECK-LABEL: define float @test_mul_just_below_negative_1_rn_ftz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.mul.rn.ftz.f(float -0.75, float 0x3FF5555560000000) + ret float %res +} + +define float @test_mul_just_below_negative_1_rp_ftz_f() { +; CHECK-LABEL: define float @test_mul_just_below_negative_1_rp_ftz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.mul.rp.ftz.f(float -0.75, float 0x3FF5555560000000) + ret float %res +} + +define float @test_mul_just_below_negative_1_rz_ftz_f() { +; CHECK-LABEL: define float @test_mul_just_below_negative_1_rz_ftz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.mul.rz.ftz.f(float -0.75, float 0x3FF5555560000000) + ret float %res +} + +;############################################################### +;# Mul(-0.75, 4/3 + epsilon) # +;############################################################### +; Tests multiplication of -0.75 by a value slightly above 4/3, +; where different rounding modes produce different results. +; The exact result would be -1.0, but since 4/3 cannot be exactly encoded +; as a double, the calculated result falls between -1.0 and -1.0 - 2^-52. +; - RN, RZ, RP round to -1.0 (rounding to nearest/zero/up) +; - RM rounds to -1.0 - 2^-52 (rounding down) + +define double @test_mul_just_below_negative_1_rm_d() { +; CHECK-LABEL: define double @test_mul_just_below_negative_1_rm_d() { +; CHECK-NEXT: ret double 0xBFF0000000000001 +; + %res = call double @llvm.nvvm.mul.rm.d(double -0.75, double 0x3FF5555555555556) + ret double %res +} + +define double @test_mul_just_below_negative_1_rn_d() { +; CHECK-LABEL: define double @test_mul_just_below_negative_1_rn_d() { +; CHECK-NEXT: ret double -1.000000e+00 +; + %res = call double @llvm.nvvm.mul.rn.d(double -0.75, double 0x3FF5555555555556) + ret double %res +} + +define double @test_mul_just_below_negative_1_rp_d() { +; CHECK-LABEL: define double @test_mul_just_below_negative_1_rp_d() { +; CHECK-NEXT: ret double -1.000000e+00 +; + %res = call double @llvm.nvvm.mul.rp.d(double -0.75, double 0x3FF5555555555556) + ret double %res +} + +define double @test_mul_just_below_negative_1_rz_d() { +; CHECK-LABEL: define double @test_mul_just_below_negative_1_rz_d() { +; CHECK-NEXT: ret double -1.000000e+00 +; + %res = call double @llvm.nvvm.mul.rz.d(double -0.75, double 0x3FF5555555555556) + ret double %res +} + +;############################################################### +;# Mul(0.625, 1.6 + epsilon) # +;############################################################### +; Tests multiplication of 5/8 * ~8/5 with different rounding modes. +; Multiply 0.625 (5/8) by a value very slightly above 8/5 = 1.6 + epsilon. +; The exact result is between 1.0 and 1.0 + 2^-23 +; - RN, RP round to 1.0 + 2^-23 (rounding towards nearest/up) +; - RZ, RM round to 1.0 (rounding towards zero/down) + +define float @test_mul_slightly_more_above_1_rm_f() { +; CHECK-LABEL: define float @test_mul_slightly_more_above_1_rm_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.mul.rm.f(float 0x3FE4000000000000, float 0x3FF99999C0000000) + ret float %res +} + +define float @test_mul_slightly_more_above_1_rn_f() { +; CHECK-LABEL: define float @test_mul_slightly_more_above_1_rn_f() { +; CHECK-NEXT: ret float 0x3FF0000020000000 +; + %res = call float @llvm.nvvm.mul.rn.f(float 0x3FE4000000000000, float 0x3FF99999C0000000) + ret float %res +} + +define float @test_mul_slightly_more_above_1_rp_f() { +; CHECK-LABEL: define float @test_mul_slightly_more_above_1_rp_f() { +; CHECK-NEXT: ret float 0x3FF0000020000000 +; + %res = call float @llvm.nvvm.mul.rp.f(float 0x3FE4000000000000, float 0x3FF99999C0000000 ) + ret float %res +} + +define float @test_mul_slightly_more_above_1_rz_f() { +; CHECK-LABEL: define float @test_mul_slightly_more_above_1_rz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.mul.rz.f(float 0x3FE4000000000000, float 0x3FF99999C0000000 ) + ret float %res +} + +define float @test_mul_slightly_more_above_1_rm_ftz_f() { +; CHECK-LABEL: define float @test_mul_slightly_more_above_1_rm_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.mul.rm.ftz.f(float 0x3FE4000000000000, float 0x3FF99999C0000000 ) + ret float %res +} + +define float @test_mul_slightly_more_above_1_rn_ftz_f() { +; CHECK-LABEL: define float @test_mul_slightly_more_above_1_rn_ftz_f() { +; CHECK-NEXT: ret float 0x3FF0000020000000 +; + %res = call float @llvm.nvvm.mul.rn.ftz.f(float 0x3FE4000000000000, float 0x3FF99999C0000000 ) + ret float %res +} + +define float @test_mul_slightly_more_above_1_rp_ftz_f() { +; CHECK-LABEL: define float @test_mul_slightly_more_above_1_rp_ftz_f() { +; CHECK-NEXT: ret float 0x3FF0000020000000 +; + %res = call float @llvm.nvvm.mul.rp.ftz.f(float 0x3FE4000000000000, float 0x3FF99999C0000000 ) + ret float %res +} + +define float @test_mul_slightly_more_above_1_rz_ftz_f() { +; CHECK-LABEL: define float @test_mul_slightly_more_above_1_rz_ftz_f() { +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = call float @llvm.nvvm.mul.rz.ftz.f(float 0x3FE4000000000000, float 0x3FF99999C0000000 ) + ret float %res +} + +;############################################################### +;# Mul(0.625, 1.6 + epsilon) # +;############################################################### +; Tests multiplication of 5/8 * ~8/5 with different rounding modes. +; Multiply 0.625 (5/8) by a value very slightly above 8/5 = 1.6 + epsilon. +; The exact result is between 1.0 and 1.0 + 2^-52 +; - RN, RP round to 1.0 + 2^-52 (rounding towards nearest/up) +; - RZ, RM round to 1.0 (rounding towards zero/down) + +define double @test_mul_slightly_more_above_1_rm_d() { +; CHECK-LABEL: define double @test_mul_slightly_more_above_1_rm_d() { +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = call double @llvm.nvvm.mul.rm.d(double 0x3FE4000000000000, double 0x3FF999999999999B) + ret double %res +} + +define double @test_mul_slightly_more_above_1_rn_d() { +; CHECK-LABEL: define double @test_mul_slightly_more_above_1_rn_d() { +; CHECK-NEXT: ret double 0x3FF0000000000001 +; + %res = call double @llvm.nvvm.mul.rn.d(double 0x3FE4000000000000, double 0x3FF999999999999B) + ret double %res +} + +define double @test_mul_slightly_more_above_1_rp_d() { +; CHECK-LABEL: define double @test_mul_slightly_more_above_1_rp_d() { +; CHECK-NEXT: ret double 0x3FF0000000000001 +; + %res = call double @llvm.nvvm.mul.rp.d(double 0x3FE4000000000000, double 0x3FF999999999999B) + ret double %res +} + +define double @test_mul_slightly_more_above_1_rz_d() { +; CHECK-LABEL: define double @test_mul_slightly_more_above_1_rz_d() { +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = call double @llvm.nvvm.mul.rz.d(double 0x3FE4000000000000, double 0x3FF999999999999B) + ret double %res +} + +;############################################################### +;# Mul(0.625, -(1.6 + epsilon) # +;############################################################### +; Tests multiplication of 5/8 * ~-8/5 with different rounding modes. +; Multiply 0.625 (5/8) by a value very slightly below -8/5 = -(1.6 + epsilon). +; The exact result is between -1.0 and -1.0 + 2^-23 +; - RN, RM round to -1.0 - 2^-23 (rounding towards nearest/down) +; - RZ, RP round to -1.0 (rounding towards zero/up) + +define float @test_mul_slightly_more_below_negative_1_rm_f() { +; CHECK-LABEL: define float @test_mul_slightly_more_below_negative_1_rm_f() { +; CHECK-NEXT: ret float 0xBFF0000020000000 +; + %res = call float @llvm.nvvm.mul.rm.f(float 0x3FE4000000000000, float 0xBFF99999C0000000) + ret float %res +} + +define float @test_mul_slightly_more_below_negative_1_rn_f() { +; CHECK-LABEL: define float @test_mul_slightly_more_below_negative_1_rn_f() { +; CHECK-NEXT: ret float 0xBFF0000020000000 +; + %res = call float @llvm.nvvm.mul.rn.f(float 0x3FE4000000000000, float 0xBFF99999C0000000) + ret float %res +} + +define float @test_mul_slightly_more_below_negative_1_rp_f() { +; CHECK-LABEL: define float @test_mul_slightly_more_below_negative_1_rp_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.mul.rp.f(float 0x3FE4000000000000, float 0xBFF99999C0000000) + ret float %res +} + +define float @test_mul_slightly_more_below_negative_1_rz_f() { +; CHECK-LABEL: define float @test_mul_slightly_more_below_negative_1_rz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.mul.rz.f(float 0x3FE4000000000000, float 0xBFF99999C0000000) + ret float %res +} + +define float @test_mul_slightly_more_below_negative_1_rm_ftz_f() { +; CHECK-LABEL: define float @test_mul_slightly_more_below_negative_1_rm_ftz_f() { +; CHECK-NEXT: ret float 0xBFF0000020000000 +; + %res = call float @llvm.nvvm.mul.rm.ftz.f(float 0x3FE4000000000000, float 0xBFF99999C0000000) + ret float %res +} + +define float @test_mul_slightly_more_below_negative_1_rn_ftz_f() { +; CHECK-LABEL: define float @test_mul_slightly_more_below_negative_1_rn_ftz_f() { +; CHECK-NEXT: ret float 0xBFF0000020000000 +; + %res = call float @llvm.nvvm.mul.rn.ftz.f(float 0x3FE4000000000000, float 0xBFF99999C0000000) + ret float %res +} + +define float @test_mul_slightly_more_below_negative_1_rp_ftz_f() { +; CHECK-LABEL: define float @test_mul_slightly_more_below_negative_1_rp_ftz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.mul.rp.ftz.f(float 0x3FE4000000000000, float 0xBFF99999C0000000) + ret float %res +} + +define float @test_mul_slightly_more_below_negative_1_rz_ftz_f() { +; CHECK-LABEL: define float @test_mul_slightly_more_below_negative_1_rz_ftz_f() { +; CHECK-NEXT: ret float -1.000000e+00 +; + %res = call float @llvm.nvvm.mul.rz.ftz.f(float 0x3FE4000000000000, float 0xBFF99999C0000000) + ret float %res +} + +;############################################################### +;# Mul(0.625, -(1.6 + epsilon) # +;############################################################### +; Tests multiplication of 5/8 * ~-8/5 with different rounding modes. +; Multiply 0.625 (5/8) by a value very slightly below -8/5 = -(1.6 + epsilon). +; The exact result is between -1.0 and -1.0 + 2^-52 +; - RN, RM round to -1.0 - 2^-52 (rounding towards nearest/down) +; - RZ, RP round to -1.0 (rounding towards zero/up) + +define double @test_mul_slightly_more_below_negative_1_rm_d() { +; CHECK-LABEL: define double @test_mul_slightly_more_below_negative_1_rm_d() { +; CHECK-NEXT: ret double 0xBFF0000000000001 +; + %res = call double @llvm.nvvm.mul.rm.d(double 0x3FE4000000000000, double 0xBFF999999999999B) + ret double %res +} + +define double @test_mul_slightly_more_below_negative_1_rn_d() { +; CHECK-LABEL: define double @test_mul_slightly_more_below_negative_1_rn_d() { +; CHECK-NEXT: ret double 0xBFF0000000000001 +; + %res = call double @llvm.nvvm.mul.rn.d(double 0x3FE4000000000000, double 0xBFF999999999999B) + ret double %res +} + +define double @test_mul_slightly_more_below_negative_1_rp_d() { +; CHECK-LABEL: define double @test_mul_slightly_more_below_negative_1_rp_d() { +; CHECK-NEXT: ret double -1.000000e+00 +; + %res = call double @llvm.nvvm.mul.rp.d(double 0x3FE4000000000000, double 0xBFF999999999999B) + ret double %res +} + +define double @test_mul_slightly_more_below_negative_1_rz_d() { +; CHECK-LABEL: define double @test_mul_slightly_more_below_negative_1_rz_d() { +; CHECK-NEXT: ret double -1.000000e+00 +; + %res = call double @llvm.nvvm.mul.rz.d(double 0x3FE4000000000000, double 0xBFF999999999999B) + ret double %res +}