diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index f0ac26b8edec8..14097d7b40a9c 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1336,22 +1336,25 @@ def pmax : PatFrags<(ops node:$lhs, node:$rhs), [ ]>; defm PMAX : SIMDBinaryFP; +multiclass PMinMaxInt { + def : Pat<(vec.int_vt (vselect + (setolt (vec.vt (bitconvert V128:$rhs)), + (vec.vt (bitconvert V128:$lhs))), + V128:$rhs, V128:$lhs)), + (baseMinInst $lhs, $rhs)>; + def : Pat<(vec.int_vt (vselect + (setolt (vec.vt (bitconvert V128:$lhs)), + (vec.vt (bitconvert V128:$rhs))), + V128:$rhs, V128:$lhs)), + (baseMaxInst $lhs, $rhs)>; +} // Also match the pmin/pmax cases where the operands are int vectors (but the // comparison is still a floating point comparison). This can happen when using // the wasm_simd128.h intrinsics because v128_t is an integer vector. foreach vec = [F32x4, F64x2, F16x8] in { -defvar pmin = !cast("PMIN_"#vec); -defvar pmax = !cast("PMAX_"#vec); -def : Pat<(vec.int_vt (vselect - (setolt (vec.vt (bitconvert V128:$rhs)), - (vec.vt (bitconvert V128:$lhs))), - V128:$rhs, V128:$lhs)), - (pmin $lhs, $rhs)>; -def : Pat<(vec.int_vt (vselect - (setolt (vec.vt (bitconvert V128:$lhs)), - (vec.vt (bitconvert V128:$rhs))), - V128:$rhs, V128:$lhs)), - (pmax $lhs, $rhs)>; + defvar pmin = !cast("PMIN_"#vec); + defvar pmax = !cast("PMAX_"#vec); + defm : PMinMaxInt; } // And match the pmin/pmax LLVM intrinsics as well @@ -1756,6 +1759,15 @@ let Predicates = [HasRelaxedSIMD] in { (relaxed_max V128:$lhs, V128:$rhs)>; def : Pat<(vec.vt (fmaximumnum (vec.vt V128:$lhs), (vec.vt V128:$rhs))), (relaxed_max V128:$lhs, V128:$rhs)>; + + // Transform pmin/max-supposed patterns to relaxed min max + let AddedComplexity = 1 in { + def : Pat<(vec.vt (pmin (vec.vt V128:$lhs), (vec.vt V128:$rhs))), + (relaxed_min $lhs, $rhs)>; + def : Pat<(vec.vt (pmax (vec.vt V128:$lhs), (vec.vt V128:$rhs))), + (relaxed_max $lhs, $rhs)>; + defm : PMinMaxInt; + } } } diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fmax.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fmax.ll index 45f4ddd783a55..f224a0dc21136 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fmax.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fmax.ll @@ -54,6 +54,250 @@ define <2 x double> @test_minimumnum_f64x2(<2 x double> %a, <2 x double> %b) { ret <2 x double> %result } +define <4 x float> @test_pmax_v4f32_olt(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_pmax_v4f32_olt: +; CHECK: .functype test_pmax_v4f32_olt (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f32x4.relaxed_max +; CHECK-NEXT: # fallthrough-return + %c = fcmp olt <4 x float> %x, %y + %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x + ret <4 x float> %a +} + +define <4 x float> @test_pmax_v4f32_ole(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_pmax_v4f32_ole: +; CHECK: .functype test_pmax_v4f32_ole (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f32x4.relaxed_max +; CHECK-NEXT: # fallthrough-return + %c = fcmp ole <4 x float> %x, %y + %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x + ret <4 x float> %a +} + +define <4 x float> @test_pmax_v4f32_ogt(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_pmax_v4f32_ogt: +; CHECK: .functype test_pmax_v4f32_ogt (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f32x4.relaxed_max +; CHECK-NEXT: # fallthrough-return + %c = fcmp ogt <4 x float> %y, %x + %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x + ret <4 x float> %a +} + +define <4 x float> @test_pmax_v4f32_oge(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_pmax_v4f32_oge: +; CHECK: .functype test_pmax_v4f32_oge (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f32x4.relaxed_max +; CHECK-NEXT: # fallthrough-return + %c = fcmp oge <4 x float> %y, %x + %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x + ret <4 x float> %a +} + +; For setlt +define <4 x float> @pmax_v4f32_fast_olt(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: pmax_v4f32_fast_olt: +; CHECK: .functype pmax_v4f32_fast_olt (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f32x4.relaxed_max +; CHECK-NEXT: # fallthrough-return + %c = fcmp fast olt <4 x float> %x, %y + %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x + ret <4 x float> %a +} + +; For setle +define <4 x float> @test_pmax_v4f32_fast_ole(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_pmax_v4f32_fast_ole: +; CHECK: .functype test_pmax_v4f32_fast_ole (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f32x4.relaxed_max +; CHECK-NEXT: # fallthrough-return + %c = fcmp fast ole <4 x float> %x, %y + %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x + ret <4 x float> %a +} + +; For setgt +define <4 x float> @test_pmax_v4f32_fast_ogt(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_pmax_v4f32_fast_ogt: +; CHECK: .functype test_pmax_v4f32_fast_ogt (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f32x4.relaxed_max +; CHECK-NEXT: # fallthrough-return + %c = fcmp fast ogt <4 x float> %x, %y + %a = select <4 x i1> %c, <4 x float> %x, <4 x float> %y + ret <4 x float> %a +} + +; For setge +define <4 x float> @test_pmax_v4f32_fast_oge(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_pmax_v4f32_fast_oge: +; CHECK: .functype test_pmax_v4f32_fast_oge (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f32x4.relaxed_max +; CHECK-NEXT: # fallthrough-return + %c = fcmp fast oge <4 x float> %x, %y + %a = select <4 x i1> %c, <4 x float> %x, <4 x float> %y + ret <4 x float> %a +} + +define <4 x i32> @test_pmax_int_v4f32(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: test_pmax_int_v4f32: +; CHECK: .functype test_pmax_int_v4f32 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: f32x4.relaxed_max +; CHECK-NEXT: # fallthrough-return + %fx = bitcast <4 x i32> %x to <4 x float> + %fy = bitcast <4 x i32> %y to <4 x float> + %c = fcmp olt <4 x float> %fy, %fx + %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y + ret <4 x i32> %a +} + +define <2 x double> @test_pmax_v2f64_olt(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: test_pmax_v2f64_olt: +; CHECK: .functype test_pmax_v2f64_olt (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f64x2.relaxed_max +; CHECK-NEXT: # fallthrough-return + %c = fcmp olt <2 x double> %x, %y + %a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x + ret <2 x double> %a +} + +define <2 x double> @test_pmax_v2f64_ole(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: test_pmax_v2f64_ole: +; CHECK: .functype test_pmax_v2f64_ole (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f64x2.relaxed_max +; CHECK-NEXT: # fallthrough-return + %c = fcmp ole <2 x double> %x, %y + %a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x + ret <2 x double> %a +} + +define <2 x double> @test_pmax_v2f64_ogt(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: test_pmax_v2f64_ogt: +; CHECK: .functype test_pmax_v2f64_ogt (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: f64x2.relaxed_max +; CHECK-NEXT: # fallthrough-return + %c = fcmp ogt <2 x double> %x, %y + %a = select <2 x i1> %c, <2 x double> %x, <2 x double> %y + ret <2 x double> %a +} +define <2 x double> @test_pmax_v2f64_oge(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: test_pmax_v2f64_oge: +; CHECK: .functype test_pmax_v2f64_oge (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: f64x2.relaxed_max +; CHECK-NEXT: # fallthrough-return + %c = fcmp oge <2 x double> %x, %y + %a = select <2 x i1> %c, <2 x double> %x, <2 x double> %y + ret <2 x double> %a +} + +; For setlt +define <2 x double> @pmax_v2f64_fast_olt(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: pmax_v2f64_fast_olt: +; CHECK: .functype pmax_v2f64_fast_olt (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f64x2.relaxed_max +; CHECK-NEXT: # fallthrough-return + %c = fcmp fast olt <2 x double> %x, %y + %a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x + ret <2 x double> %a +} + +; For setle +define <2 x double> @test_pmax_v2f64_fast_ole(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: test_pmax_v2f64_fast_ole: +; CHECK: .functype test_pmax_v2f64_fast_ole (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f64x2.relaxed_max +; CHECK-NEXT: # fallthrough-return + %c = fcmp fast ole <2 x double> %x, %y + %a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x + ret <2 x double> %a +} +; For setgt +define <2 x double> @test_pmax_v2f64_fast_ogt(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: test_pmax_v2f64_fast_ogt: +; CHECK: .functype test_pmax_v2f64_fast_ogt (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f64x2.relaxed_max +; CHECK-NEXT: # fallthrough-return + %c = fcmp fast ogt <2 x double> %x, %y + %a = select <2 x i1> %c, <2 x double> %x, <2 x double> %y + ret <2 x double> %a +} + +; For setge +define <2 x double> @test_pmax_v2f64_fast_oge(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: test_pmax_v2f64_fast_oge: +; CHECK: .functype test_pmax_v2f64_fast_oge (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f64x2.relaxed_max +; CHECK-NEXT: # fallthrough-return + %c = fcmp fast oge <2 x double> %x, %y + %a = select <2 x i1> %c, <2 x double> %x, <2 x double> %y + ret <2 x double> %a +} + +define <2 x i64> @test_pmax_int_v2f64(<2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: test_pmax_int_v2f64: +; CHECK: .functype test_pmax_int_v2f64 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: f64x2.relaxed_max +; CHECK-NEXT: # fallthrough-return + %fx = bitcast <2 x i64> %x to <2 x double> + %fy = bitcast <2 x i64> %y to <2 x double> + %c = fcmp olt <2 x double> %fy, %fx + %a = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %y + ret <2 x i64> %a +} + declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) declare <4 x float> @llvm.maximumnum.v4f32(<4 x float>, <4 x float>) declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fmin.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fmin.ll index f3eec023663a7..460446574ecfb 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fmin.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fmin.ll @@ -53,6 +53,252 @@ define <2 x double> @test_minimumnum_f64x2(<2 x double> %a, <2 x double> %b) { ret <2 x double> %result } +define <4 x float> @test_pmin_v4f32_olt(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_pmin_v4f32_olt: +; CHECK: .functype test_pmin_v4f32_olt (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f32x4.relaxed_min +; CHECK-NEXT: # fallthrough-return + %c = fcmp olt <4 x float> %y, %x + %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x + ret <4 x float> %a +} + +define <4 x float> @test_pmin_v4f32_ole(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_pmin_v4f32_ole: +; CHECK: .functype test_pmin_v4f32_ole (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f32x4.relaxed_min +; CHECK-NEXT: # fallthrough-return + %c = fcmp ole <4 x float> %y, %x + %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x + ret <4 x float> %a +} + +define <4 x float> @test_pmin_v4f32_ogt(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_pmin_v4f32_ogt: +; CHECK: .functype test_pmin_v4f32_ogt (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f32x4.relaxed_min +; CHECK-NEXT: # fallthrough-return + %c = fcmp ogt <4 x float> %x, %y + %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x + ret <4 x float> %a +} + +define <4 x float> @test_pmin_v4f32_oge(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_pmin_v4f32_oge: +; CHECK: .functype test_pmin_v4f32_oge (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f32x4.relaxed_min +; CHECK-NEXT: # fallthrough-return + %c = fcmp oge <4 x float> %x, %y + %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x + ret <4 x float> %a +} + +; For setlt +define <4 x float> @pmin_v4f32_fast_olt(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: pmin_v4f32_fast_olt: +; CHECK: .functype pmin_v4f32_fast_olt (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: f32x4.relaxed_min +; CHECK-NEXT: # fallthrough-return + %c = fcmp fast olt <4 x float> %y, %x + %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x + ret <4 x float> %a +} + +; For setle +define <4 x float> @test_pmin_v4f32_fast_ole(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_pmin_v4f32_fast_ole: +; CHECK: .functype test_pmin_v4f32_fast_ole (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: f32x4.relaxed_min +; CHECK-NEXT: # fallthrough-return + %c = fcmp fast ole <4 x float> %y, %x + %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x + ret <4 x float> %a +} + +; For setgt +define <4 x float> @test_pmin_v4f32_fast_ogt(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_pmin_v4f32_fast_ogt: +; CHECK: .functype test_pmin_v4f32_fast_ogt (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f32x4.relaxed_min +; CHECK-NEXT: # fallthrough-return + %c = fcmp fast ogt <4 x float> %x, %y + %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x + ret <4 x float> %a +} + +; For setge +define <4 x float> @test_pmin_v4f32_fast_oge(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: test_pmin_v4f32_fast_oge: +; CHECK: .functype test_pmin_v4f32_fast_oge (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f32x4.relaxed_min +; CHECK-NEXT: # fallthrough-return + %c = fcmp fast oge <4 x float> %x, %y + %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x + ret <4 x float> %a +} + +define <4 x i32> @test_pmin_int_v4f32(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: test_pmin_int_v4f32: +; CHECK: .functype test_pmin_int_v4f32 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f32x4.relaxed_min +; CHECK-NEXT: # fallthrough-return + %fx = bitcast <4 x i32> %x to <4 x float> + %fy = bitcast <4 x i32> %y to <4 x float> + %c = fcmp olt <4 x float> %fy, %fx + %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %x + ret <4 x i32> %a +} + +define <2 x double> @test_pmin_v2f64_olt(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: test_pmin_v2f64_olt: +; CHECK: .functype test_pmin_v2f64_olt (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f64x2.relaxed_min +; CHECK-NEXT: # fallthrough-return + %c = fcmp olt <2 x double> %y, %x + %a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x + ret <2 x double> %a +} + +define <2 x double> @test_pmin_v2f64_ole(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: test_pmin_v2f64_ole: +; CHECK: .functype test_pmin_v2f64_ole (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f64x2.relaxed_min +; CHECK-NEXT: # fallthrough-return + %c = fcmp ole <2 x double> %y, %x + %a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x + ret <2 x double> %a +} + +define <2 x double> @test_pmin_v2f64_ogt(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: test_pmin_v2f64_ogt: +; CHECK: .functype test_pmin_v2f64_ogt (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f64x2.relaxed_min +; CHECK-NEXT: # fallthrough-return + %c = fcmp ogt <2 x double> %x, %y + %a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x + ret <2 x double> %a +} + +define <2 x double> @test_pmin_v2f64_oge(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: test_pmin_v2f64_oge: +; CHECK: .functype test_pmin_v2f64_oge (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f64x2.relaxed_min +; CHECK-NEXT: # fallthrough-return + %c = fcmp oge <2 x double> %x, %y + %a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x + ret <2 x double> %a +} + +; For setlt +define <2 x double> @pmin_v2f64_fast_olt(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: pmin_v2f64_fast_olt: +; CHECK: .functype pmin_v2f64_fast_olt (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: f64x2.relaxed_min +; CHECK-NEXT: # fallthrough-return + %c = fcmp fast olt <2 x double> %y, %x + %a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x + ret <2 x double> %a +} + +; For setle +define <2 x double> @test_pmin_v2f64_fast_ole(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: test_pmin_v2f64_fast_ole: +; CHECK: .functype test_pmin_v2f64_fast_ole (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: f64x2.relaxed_min +; CHECK-NEXT: # fallthrough-return + %c = fcmp fast ole <2 x double> %y, %x + %a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x + ret <2 x double> %a +} + +; For setgt +define <2 x double> @test_pmin_v2f64_fast_ogt(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: test_pmin_v2f64_fast_ogt: +; CHECK: .functype test_pmin_v2f64_fast_ogt (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f64x2.relaxed_min +; CHECK-NEXT: # fallthrough-return + %c = fcmp fast ogt <2 x double> %x, %y + %a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x + ret <2 x double> %a +} + +; For setge +define <2 x double> @test_pmin_v2f64_fast_oge(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: test_pmin_v2f64_fast_oge: +; CHECK: .functype test_pmin_v2f64_fast_oge (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f64x2.relaxed_min +; CHECK-NEXT: # fallthrough-return + %c = fcmp fast oge <2 x double> %x, %y + %a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x + ret <2 x double> %a +} + +define <2 x i64> @test_pmin_int_v2f64(<2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: test_pmin_int_v2f64: +; CHECK: .functype test_pmin_int_v2f64 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: f64x2.relaxed_min +; CHECK-NEXT: # fallthrough-return + %fx = bitcast <2 x i64> %x to <2 x double> + %fy = bitcast <2 x i64> %y to <2 x double> + %c = fcmp olt <2 x double> %fy, %fx + %a = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %x + ret <2 x i64> %a +} + declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) declare <4 x float> @llvm.fminimumnum.v4f32(<4 x float>, <4 x float>) declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)