diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll new file mode 100644 index 0000000000000..cce0fb7e003c5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll @@ -0,0 +1,2719 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s +;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX1030 %s +;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX1100 %s + +define float @fmul_select_f32_test1(float %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f32_test1: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f32_test1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f32_test1: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f32_test1: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, float 2.000000e+00, float 1.000000e+00 + %ldexp = fmul float %x, %y + ret float %ldexp +} + +define float @fmul_select_f32_test2(float %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f32_test2: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f32_test2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f32_test2: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f32_test2: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, float 5.000000e-01, float 1.000000e+00 + %ldexp = fmul float %x, %y + ret float %ldexp +} + +define <2 x float> @fmul_select_v2f32_test3(<2 x float> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { +; GFX7-LABEL: fmul_select_v2f32_test3: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_v2f32_test3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX9-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_v2f32_test3: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1030-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1030-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_v2f32_test3: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1100-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5 +; GFX1100-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 + %y = select <2 x i1> %bool, <2 x float> , <2 x float> + %ldexp = fmul <2 x float> %x, %y + ret <2 x float> %ldexp +} + +define <2 x float> @fmul_select_v2f32_test4(<2 x float> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { +; GFX7-LABEL: fmul_select_v2f32_test4: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_v2f32_test4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX9-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_v2f32_test4: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1030-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1030-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_v2f32_test4: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1100-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5 +; GFX1100-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 + %y = select <2 x i1> %bool, <2 x float> , <2 x float> + %ldexp = fmul <2 x float> %x, %y + ret <2 x float> %ldexp +} + +define float @fmul_select_f32_test5(float %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f32_test5: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f32_test5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f32_test5: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f32_test5: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, float -2.000000e+00, float -1.000000e+00 + %ldexp = fmul float %x, %y + ret float %ldexp +} + +define float @fmul_select_f32_test6(float %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f32_test6: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0xc0400000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f32_test6: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x41000000 +; GFX9-NEXT: v_mov_b32_e32 v4, 0xc0400000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f32_test6: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0xc0400000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v3, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f32_test6: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0xc0400000 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v3, vcc_lo +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, float -3.000000e+00, float 8.000000e+00 + %ldexp = fmul float %x, %y + ret float %ldexp +} + +define float @fmul_select_f32_test7_sel_log2val_pos59_pos92(float %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v3, 0x6d800000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0x5d000000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x6d800000 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x5d000000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x5d000000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x6d800000, v3, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0x5d000000 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x6d800000, v3, vcc_lo +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, float 0x43A0000000000000, float 0x45B0000000000000 + %ldexp = fmul float %x, %y + ret float %ldexp +} + +define float @fmul_select_f32_test8(float %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f32_test8: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v3, 0xc1000000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0x41800000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f32_test8: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0xc1000000 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x41800000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f32_test8: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x41800000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xc1000000, v3, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f32_test8: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0x41800000 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xc1000000, v3, vcc_lo +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, float 1.600000e+01, float -8.000000e+00 + %ldexp = fmul float %x, %y + ret float %ldexp +} + +define float @fmul_select_f32_test9(float %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f32_test9: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f32_test9: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f32_test9: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f32_test9: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, float 0.000000e+00, float 2.000000e+00 + %ldexp = fmul float %x, %y + ret float %ldexp +} + +define float @fmul_select_f32_test10(float %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f32_test10: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_bfrev_b32_e32 v3, 1 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f32_test10: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_bfrev_b32_e32 v3, 1 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f32_test10: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f32_test10: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, float -0.000000e+00, float 0.000000e+00 + %ldexp = fmul float %x, %y + ret float %ldexp +} + +define float @fmul_select_f32_test11_sel_log2val_pos78_pos56(float %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v3, 0xdb800000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0xe6800000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0xdb800000 +; GFX9-NEXT: v_mov_b32_e32 v4, 0xe6800000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0xe6800000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xdb800000, v3, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0xe6800000 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xdb800000, v3, vcc_lo +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, float 0xC4D0000000000000, float 0xC370000000000000 + %ldexp = fmul float %x, %y + ret float %ldexp +} + +define float @fmul_select_f32_test12_sel_log2val_neg48_pos68(float %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v3, 0x61800000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0x27800000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x61800000 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x27800000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x27800000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x61800000, v3, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0x27800000 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x61800000, v3, vcc_lo +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, float 0x3CF0000000000000, float 0x4430000000000000 + %ldexp = fmul float %x, %y + ret float %ldexp +} + +define double @fmul_select_f64_test1(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test1: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v4, 0x3ff00000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v4, 0x3ff00000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test1: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0 +; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x3ff00000, 2.0, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test1: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: v_mov_b32_e32 v4, 0 +; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x3ff00000, 2.0, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double 2.000000e+00, double 1.000000e+00 + %ldexp = fmul double %x, %y + ret double %ldexp +} + +define double @fmul_select_f64_test2(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test2: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v4, 0x3ff00000 +; GFX7-NEXT: v_mov_b32_e32 v5, 0x3fe00000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v4, 0x3ff00000 +; GFX9-NEXT: v_mov_b32_e32 v5, 0x3fe00000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test2: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3fe00000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0 +; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0x3ff00000, v5, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test2: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v5, 0x3fe00000 :: v_dual_mov_b32 v4, 0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0x3ff00000, v5, vcc_lo +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double 5.000000e-01, double 1.000000e+00 + %ldexp = fmul double %x, %y + ret double %ldexp +} + +define <2 x double> @fmul_select_v2f64_test3(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { +; GFX7-LABEL: fmul_select_v2f64_test3: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v11, 0x3ff00000 +; GFX7-NEXT: v_mov_b32_e32 v8, 0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; GFX7-NEXT: v_cndmask_b32_e64 v10, v11, 2.0, vcc +; GFX7-NEXT: v_mov_b32_e32 v9, v8 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] +; GFX7-NEXT: v_cndmask_b32_e64 v9, v11, 2.0, vcc +; GFX7-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_v2f64_test3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v11, 0x3ff00000 +; GFX9-NEXT: v_mov_b32_e32 v8, 0 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v10, v11, 2.0, vcc +; GFX9-NEXT: v_mov_b32_e32 v9, v8 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] +; GFX9-NEXT: v_cndmask_b32_e64 v9, v11, 2.0, vcc +; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_v2f64_test3: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX1030-NEXT: v_mov_b32_e32 v8, 0 +; GFX1030-NEXT: v_cndmask_b32_e64 v11, 0x3ff00000, 2.0, vcc_lo +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX1030-NEXT: v_mov_b32_e32 v10, v8 +; GFX1030-NEXT: v_cndmask_b32_e64 v9, 0x3ff00000, 2.0, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] +; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_v2f64_test3: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX1100-NEXT: v_mov_b32_e32 v8, 0 +; GFX1100-NEXT: v_cndmask_b32_e64 v11, 0x3ff00000, 2.0, vcc_lo +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_mov_b32_e32 v10, v8 +; GFX1100-NEXT: v_cndmask_b32_e64 v9, 0x3ff00000, 2.0, vcc_lo +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 + %y = select <2 x i1> %bool, <2 x double> , <2 x double> + %ldexp = fmul <2 x double> %x, %y + ret <2 x double> %ldexp +} + +define <2 x double> @fmul_select_v2f64_test4(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { +; GFX7-LABEL: fmul_select_v2f64_test4: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v11, 0x3ff00000 +; GFX7-NEXT: v_mov_b32_e32 v12, 0x3fe00000 +; GFX7-NEXT: v_mov_b32_e32 v8, 0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; GFX7-NEXT: v_cndmask_b32_e32 v10, v11, v12, vcc +; GFX7-NEXT: v_mov_b32_e32 v9, v8 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] +; GFX7-NEXT: v_cndmask_b32_e32 v9, v11, v12, vcc +; GFX7-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_v2f64_test4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v11, 0x3ff00000 +; GFX9-NEXT: v_mov_b32_e32 v12, 0x3fe00000 +; GFX9-NEXT: v_mov_b32_e32 v8, 0 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v10, v11, v12, vcc +; GFX9-NEXT: v_mov_b32_e32 v9, v8 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] +; GFX9-NEXT: v_cndmask_b32_e32 v9, v11, v12, vcc +; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_v2f64_test4: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v9, 0x3fe00000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX1030-NEXT: v_mov_b32_e32 v8, 0 +; GFX1030-NEXT: v_cndmask_b32_e32 v11, 0x3ff00000, v9, vcc_lo +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX1030-NEXT: v_mov_b32_e32 v10, v8 +; GFX1030-NEXT: v_cndmask_b32_e32 v9, 0x3ff00000, v9, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] +; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_v2f64_test4: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v9, 0x3fe00000 :: v_dual_mov_b32 v8, 0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_dual_mov_b32 v10, v8 :: v_dual_cndmask_b32 v11, 0x3ff00000, v9 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] +; GFX1100-NEXT: v_cndmask_b32_e32 v9, 0x3ff00000, v9, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 + %y = select <2 x i1> %bool, <2 x double> , <2 x double> + %ldexp = fmul <2 x double> %x, %y + ret <2 x double> %ldexp +} + +define double @fmul_select_f64_test5(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test5: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v4, 0xbff00000 +; GFX7-NEXT: v_mov_b32_e32 v5, 0xbfe00000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v4, 0xbff00000 +; GFX9-NEXT: v_mov_b32_e32 v5, 0xbfe00000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test5: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v5, 0xbfe00000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0 +; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0xbff00000, v5, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test5: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v5, 0xbfe00000 :: v_dual_mov_b32 v4, 0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0xbff00000, v5, vcc_lo +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double -5.000000e-01, double -1.000000e+00 + %ldexp = fmul double %x, %y + ret double %ldexp +} + +define double @fmul_select_f64_test6(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test6: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v4, 0xbff00000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test6: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v4, 0xbff00000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test6: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0 +; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, -2.0, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test6: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: v_mov_b32_e32 v4, 0 +; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, -2.0, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double -2.000000e+00, double -1.000000e+00 + %ldexp = fmul double %x, %y + ret double %ldexp +} + +define double @fmul_select_f64_test7(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test7: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v4, 0xbff00000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test7: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v4, 0xbff00000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test7: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0 +; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, 2.0, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test7: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: v_mov_b32_e32 v4, 0 +; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, 2.0, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double 2.000000e+00, double -1.000000e+00 + %ldexp = fmul double %x, %y + ret double %ldexp +} + +define double @fmul_select_f64_test8(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test8: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v4, 0xc0400000 +; GFX7-NEXT: v_mov_b32_e32 v5, 0xc0100000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test8: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v4, 0xc0400000 +; GFX9-NEXT: v_mov_b32_e32 v5, 0xc0100000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test8: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v5, 0xc0100000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0 +; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0xc0400000, v5, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test8: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v5, 0xc0100000 :: v_dual_mov_b32 v4, 0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0xc0400000, v5, vcc_lo +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double -4.000000e+00, double -3.200000e+01 + %ldexp = fmul double %x, %y + ret double %ldexp +} + +define <2 x double> @fmul_select_v2f64_test9(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { +; GFX7-LABEL: fmul_select_v2f64_test9: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v11, 0xbff00000 +; GFX7-NEXT: v_mov_b32_e32 v8, 0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; GFX7-NEXT: v_cndmask_b32_e64 v10, v11, -2.0, vcc +; GFX7-NEXT: v_mov_b32_e32 v9, v8 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] +; GFX7-NEXT: v_cndmask_b32_e64 v9, v11, -2.0, vcc +; GFX7-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_v2f64_test9: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v11, 0xbff00000 +; GFX9-NEXT: v_mov_b32_e32 v8, 0 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v10, v11, -2.0, vcc +; GFX9-NEXT: v_mov_b32_e32 v9, v8 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] +; GFX9-NEXT: v_cndmask_b32_e64 v9, v11, -2.0, vcc +; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_v2f64_test9: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX1030-NEXT: v_mov_b32_e32 v8, 0 +; GFX1030-NEXT: v_cndmask_b32_e64 v11, 0xbff00000, -2.0, vcc_lo +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX1030-NEXT: v_mov_b32_e32 v10, v8 +; GFX1030-NEXT: v_cndmask_b32_e64 v9, 0xbff00000, -2.0, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] +; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_v2f64_test9: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX1100-NEXT: v_mov_b32_e32 v8, 0 +; GFX1100-NEXT: v_cndmask_b32_e64 v11, 0xbff00000, -2.0, vcc_lo +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_mov_b32_e32 v10, v8 +; GFX1100-NEXT: v_cndmask_b32_e64 v9, 0xbff00000, -2.0, vcc_lo +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 + %y = select <2 x i1> %bool, <2 x double> , <2 x double> + %ldexp = fmul <2 x double> %x, %y + ret <2 x double> %ldexp +} + +define <2 x double> @fmul_select_v2f64_test10(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { +; GFX7-LABEL: fmul_select_v2f64_test10: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v8, 0 +; GFX7-NEXT: v_mov_b32_e32 v9, 0xbff00000 +; GFX7-NEXT: v_mov_b32_e32 v10, 0x3fe00000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; GFX7-NEXT: v_mov_b32_e32 v11, 0x3ff00000 +; GFX7-NEXT: v_cndmask_b32_e32 v10, v9, v10, vcc +; GFX7-NEXT: v_mov_b32_e32 v9, v8 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] +; GFX7-NEXT: v_cndmask_b32_e64 v9, v11, 2.0, vcc +; GFX7-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_v2f64_test10: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v8, 0 +; GFX9-NEXT: v_mov_b32_e32 v9, 0xbff00000 +; GFX9-NEXT: v_mov_b32_e32 v10, 0x3fe00000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; GFX9-NEXT: v_mov_b32_e32 v11, 0x3ff00000 +; GFX9-NEXT: v_cndmask_b32_e32 v10, v9, v10, vcc +; GFX9-NEXT: v_mov_b32_e32 v9, v8 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] +; GFX9-NEXT: v_cndmask_b32_e64 v9, v11, 2.0, vcc +; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_v2f64_test10: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v9, 0x3fe00000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX1030-NEXT: v_mov_b32_e32 v8, 0 +; GFX1030-NEXT: v_cndmask_b32_e32 v11, 0xbff00000, v9, vcc_lo +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX1030-NEXT: v_mov_b32_e32 v10, v8 +; GFX1030-NEXT: v_cndmask_b32_e64 v9, 0x3ff00000, 2.0, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] +; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_v2f64_test10: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v9, 0x3fe00000 :: v_dual_mov_b32 v8, 0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_dual_mov_b32 v10, v8 :: v_dual_cndmask_b32 v11, 0xbff00000, v9 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] +; GFX1100-NEXT: v_cndmask_b32_e64 v9, 0x3ff00000, 2.0, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 + %y = select <2 x i1> %bool, <2 x double> , <2 x double> + %ldexp = fmul <2 x double> %x, %y + ret <2 x double> %ldexp +} + +define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test11: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_bfrev_b32_e32 v4, 1 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test11: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_bfrev_b32_e32 v4, 1 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test11: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0 +; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x80000000, -2.0, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test11: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: v_mov_b32_e32 v4, 0 +; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x80000000, -2.0, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double -2.000000e+00, double -0.000000e+00 + %ldexp = fmul double %x, %y + ret double %ldexp +} + +define double @fmul_select_f64_test12(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test12: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 31, v2 +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test12: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 31, v2 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test12: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v2, 0 +; GFX1030-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo +; GFX1030-NEXT: v_lshlrev_b32_e32 v3, 31, v3 +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test12: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 31, v3 +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double 0.000000e+00, double -0.000000e+00 + %ldexp = fmul double %x, %y + ret double %ldexp +} + +define double @fmul_select_f64_test13(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test13: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v5, 0x40300000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_mov_b32_e32 v4, 0 +; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test13: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v5, 0x40300000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test13: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0 +; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test13: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: v_mov_b32_e32 v4, 0 +; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double 0.000000e+00, double 1.600000e+01 + %ldexp = fmul double %x, %y + ret double %ldexp +} + +define double @fmul_select_f64_test14_sel_log2val_pos92_neg27(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v4, 0x3e400000 +; GFX7-NEXT: v_mov_b32_e32 v5, 0x45b00000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v4, 0x3e400000 +; GFX9-NEXT: v_mov_b32_e32 v5, 0x45b00000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v5, 0x45b00000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0 +; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0x3e400000, v5, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v5, 0x45b00000 :: v_dual_mov_b32 v4, 0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0x3e400000, v5, vcc_lo +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double 0x45B0000000000000, double 0x3E40000000000000 + %ldexp = fmul double %x, %y + ret double %ldexp +} + +define double @fmul_select_f64_test15_sel_log2val_neg42_neg33(double %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v4, 0x3de00000 +; GFX7-NEXT: v_mov_b32_e32 v5, 0x3d500000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX7-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v4, 0x3de00000 +; GFX9-NEXT: v_mov_b32_e32 v5, 0x3d500000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3d500000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0 +; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0x3de00000, v5, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v5, 0x3d500000 :: v_dual_mov_b32 v4, 0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0x3de00000, v5, vcc_lo +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, double 0x3D50000000000000, double 0x3DE0000000000000 + %ldexp = fmul double %x, %y + ret double %ldexp +} + + +define half @fmul_select_f16_test1(half %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f16_test1: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f16_test1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x3c00 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f16_test1: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo +; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f16_test1: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4000 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo +; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, half 2.000000e+00, half 1.000000e+00 + %ldexp = fmul half %x, %y + ret half %ldexp +} + +define half @fmul_select_f16_test2(half %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f16_test2: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f16_test2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x3c00 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x3800 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f16_test2: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x3800 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo +; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f16_test2: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0x3800 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo +; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, half 5.000000e-01, half 1.000000e+00 + %ldexp = fmul half %x, %y + ret half %ldexp +} + +define <2 x half> @fmul_select_v2f16_test3(<2 x half> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { +; GFX7-LABEL: fmul_select_v2f16_test3: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc +; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_v2f16_test3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v5, 0x3c00 +; GFX9-NEXT: v_mov_b32_e32 v6, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc +; GFX9-NEXT: v_pack_b32_f16 v1, v1, v2 +; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_v2f16_test3: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v5, 0x4000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo +; GFX1030-NEXT: v_pack_b32_f16 v1, v1, v2 +; GFX1030-NEXT: v_pk_mul_f16 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_v2f16_test3: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v5, 0x4000 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo +; GFX1100-NEXT: v_pack_b32_f16 v1, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_pk_mul_f16 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 + %y = select <2 x i1> %bool, <2 x half> , <2 x half> + %ldexp = fmul <2 x half> %x, %y + ret <2 x half> %ldexp +} + +define <2 x half> @fmul_select_v2f16_test4(<2 x half> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { +; GFX7-LABEL: fmul_select_v2f16_test4: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc +; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_v2f16_test4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v5, 0x3c00 +; GFX9-NEXT: v_mov_b32_e32 v6, 0x3800 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc +; GFX9-NEXT: v_pack_b32_f16 v1, v1, v2 +; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_v2f16_test4: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3800 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo +; GFX1030-NEXT: v_pack_b32_f16 v1, v1, v2 +; GFX1030-NEXT: v_pk_mul_f16 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_v2f16_test4: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v5, 0x3800 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo +; GFX1100-NEXT: v_pack_b32_f16 v1, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_pk_mul_f16 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 + %y = select <2 x i1> %bool, <2 x half> , <2 x half> + %ldexp = fmul <2 x half> %x, %y + ret <2 x half> %ldexp +} + +define half @fmul_select_f16_test5(half %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f16_test5: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, v3, 2.0, vcc +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f16_test5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x4800 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f16_test5: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4800, v3, vcc_lo +; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f16_test5: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4000 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4800, v3, vcc_lo +; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, half 2.000000e+00, half 8.000000e+00 + %ldexp = fmul half %x, %y + ret half %ldexp +} + +define half @fmul_select_f16_test6(half %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f16_test6: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0x40400000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1000000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f16_test6: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x4200 +; GFX9-NEXT: v_mov_b32_e32 v4, 0xc800 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f16_test6: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0xc800 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo +; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f16_test6: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0xc800 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo +; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, half -8.000000e+00, half 3.000000e+00 + %ldexp = fmul half %x, %y + ret half %ldexp +} + +define half @fmul_select_f16_test7(half %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f16_test7: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, -4.0, v3, vcc +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f16_test7: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0xc400 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x4800 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f16_test7: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4800 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo +; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f16_test7: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4800 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo +; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, half 8.000000e+00, half -4.000000e+00 + %ldexp = fmul half %x, %y + ret half %ldexp +} + +define half @fmul_select_f16_test8(half %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f16_test8: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_bfrev_b32_e32 v3, 1 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f16_test8: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x8000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f16_test8: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo +; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f16_test8: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, half -0.000000e+00, half 0.000000e+00 + %ldexp = fmul half %x, %y + ret half %ldexp +} + +define half @fmul_select_f16_test9(half %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f16_test9: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0xc2000000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1800000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f16_test9: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0xd000 +; GFX9-NEXT: v_mov_b32_e32 v4, 0xcc00 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f16_test9: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0xcc00 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo +; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f16_test9: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0xcc00 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo +; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, half -1.600000e+01, half -3.200000e+01 + %ldexp = fmul half %x, %y + ret half %ldexp +} + +define half @fmul_select_f16_test10_sel_log2val_neg11_pos11(half %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0x45000000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0x3a000000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x6800 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x1000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x1000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x6800, v3, vcc_lo +; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0x1000 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x6800, v3, vcc_lo +; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, half 0xH1000, half 0xH6800 + %ldexp = fmul half %x, %y + ret half %ldexp +} + +define half @fmul_select_f16_test11_sel_log2val_pos7_neg14(half %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0x38800000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0x43000000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x400 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x5800 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x5800 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x400, v3, vcc_lo +; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, 0x5800 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x400, v3, vcc_lo +; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, half 0xH5800, half 0xH0400 + %ldexp = fmul half %x, %y + ret half %ldexp +} + +define bfloat @fmul_select_bf16_test1(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_bf16_test1: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_bf16_test1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x3f80 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_bf16_test1: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_bf16_test1: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v3, 0x4000 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, bfloat 2.000000e+00, bfloat 1.000000e+00 + %ldexp = fmul bfloat %x, %y + ret bfloat %ldexp +} + +define bfloat @fmul_select_bf16_test2(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_bf16_test2: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_bf16_test2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x3f80 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x3f00 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_bf16_test2: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x3f00 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_bf16_test2: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v3, 0x3f00 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, bfloat 5.000000e-01, bfloat 1.000000e+00 + %ldexp = fmul bfloat %x, %y + ret bfloat %ldexp +} + +define <2 x bfloat> @fmul_select_v2bf16_test3(<2 x bfloat> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { +; GFX7-LABEL: fmul_select_v2bf16_test3: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc +; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_v2bf16_test3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v5, 0x3f80 +; GFX9-NEXT: v_mov_b32_e32 v6, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-NEXT: v_bfe_u32 v3, v1, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX9-NEXT: v_add3_u32 v3, v3, v1, s4 +; GFX9-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 +; GFX9-NEXT: v_bfe_u32 v2, v0, 16, 1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_add3_u32 v2, v2, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v3, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x7060302 +; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_v2bf16_test3: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v5, 0x4000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX1030-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX1030-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX1030-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX1030-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1030-NEXT: v_bfe_u32 v2, v1, 16, 1 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 +; GFX1030-NEXT: v_bfe_u32 v3, v0, 16, 1 +; GFX1030-NEXT: v_add3_u32 v2, v2, v1, 0x7fff +; GFX1030-NEXT: v_or_b32_e32 v5, 0x400000, v0 +; GFX1030-NEXT: v_add3_u32 v3, v3, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo +; GFX1030-NEXT: v_perm_b32 v0, v0, v1, 0x7060302 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_v2bf16_test3: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v5, 0x4000 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX1100-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX1100-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_lshlrev_b32 v1, 16, v1 +; GFX1100-NEXT: v_or_b32_e32 v5, 0x400000, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX1100-NEXT: v_bfe_u32 v3, v0, 16, 1 +; GFX1100-NEXT: v_bfe_u32 v2, v1, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1100-NEXT: v_add3_u32 v3, v3, v0, 0x7fff +; GFX1100-NEXT: v_add3_u32 v2, v2, v1, 0x7fff +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_perm_b32 v0, v0, v1, 0x7060302 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 + %y = select <2 x i1> %bool, <2 x bfloat> , <2 x bfloat> + %ldexp = fmul <2 x bfloat> %x, %y + ret <2 x bfloat> %ldexp +} + +define <2 x bfloat> @fmul_select_v2bf16_test4(<2 x bfloat> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { +; GFX7-LABEL: fmul_select_v2bf16_test4: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc +; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_v2bf16_test4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v5, 0x3f80 +; GFX9-NEXT: v_mov_b32_e32 v6, 0x3f00 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-NEXT: v_bfe_u32 v3, v1, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX9-NEXT: v_add3_u32 v3, v3, v1, s4 +; GFX9-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 +; GFX9-NEXT: v_bfe_u32 v2, v0, 16, 1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_add3_u32 v2, v2, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v3, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x7060302 +; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_v2bf16_test4: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3f00 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX1030-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX1030-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo +; GFX1030-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX1030-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX1030-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1030-NEXT: v_bfe_u32 v2, v1, 16, 1 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 +; GFX1030-NEXT: v_bfe_u32 v3, v0, 16, 1 +; GFX1030-NEXT: v_add3_u32 v2, v2, v1, 0x7fff +; GFX1030-NEXT: v_or_b32_e32 v5, 0x400000, v0 +; GFX1030-NEXT: v_add3_u32 v3, v3, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo +; GFX1030-NEXT: v_perm_b32 v0, v0, v1, 0x7060302 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_v2bf16_test4: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v5, 0x3f00 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX1100-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX1100-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_lshlrev_b32 v1, 16, v1 +; GFX1100-NEXT: v_or_b32_e32 v5, 0x400000, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX1100-NEXT: v_bfe_u32 v3, v0, 16, 1 +; GFX1100-NEXT: v_bfe_u32 v2, v1, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1100-NEXT: v_add3_u32 v3, v3, v0, 0x7fff +; GFX1100-NEXT: v_add3_u32 v2, v2, v1, 0x7fff +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_perm_b32 v0, v0, v1, 0x7060302 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 + %y = select <2 x i1> %bool, <2 x bfloat> , <2 x bfloat> + %ldexp = fmul <2 x bfloat> %x, %y + ret <2 x bfloat> %ldexp +} + +define bfloat @fmul_select_bf16_test5(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_bf16_test5: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, v3, 2.0, vcc +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_bf16_test5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x4100 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_bf16_test5: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4100, v3, vcc_lo +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_bf16_test5: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v3, 0x4000 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4100, v3, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, bfloat 2.000000e+00, bfloat 8.000000e+00 + %ldexp = fmul bfloat %x, %y + ret bfloat %ldexp +} + +define bfloat @fmul_select_bf16_test6(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_bf16_test6: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0x40400000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1000000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_bf16_test6: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x4040 +; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffc100 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_bf16_test6: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0xffffc100 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4040, v3, vcc_lo +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_bf16_test6: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v3, 0xffffc100 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4040, v3, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, bfloat -8.000000e+00, bfloat 3.000000e+00 + %ldexp = fmul bfloat %x, %y + ret bfloat %ldexp +} + +define bfloat @fmul_select_bf16_test7(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_bf16_test7: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, -4.0, v3, vcc +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_bf16_test7: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0xffffc080 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x4100 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_bf16_test7: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4100 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xffffc080, v3, vcc_lo +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_bf16_test7: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v3, 0x4100 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xffffc080, v3, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, bfloat 8.000000e+00, bfloat -4.000000e+00 + %ldexp = fmul bfloat %x, %y + ret bfloat %ldexp +} + +define bfloat @fmul_select_bf16_test8(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_bf16_test8: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 31, v1 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_bf16_test8: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-NEXT: v_mov_b32_e32 v2, 15 +; GFX9-NEXT: v_lshlrev_b16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_bf16_test8: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1030-NEXT: v_lshlrev_b16 v1, 15, v1 +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_bf16_test8: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_lshlrev_b16 v1, 15, v1 +; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, bfloat -0.000000e+00, bfloat 0.000000e+00 + %ldexp = fmul bfloat %x, %y + ret bfloat %ldexp +} + +define bfloat @fmul_select_bf16_test9(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_bf16_test9: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0xc2000000 +; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1800000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_bf16_test9: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0xffffc200 +; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffc180 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_bf16_test9: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0xffffc180 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xffffc200, v3, vcc_lo +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_bf16_test9: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v3, 0xffffc180 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xffffc200, v3, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, bfloat -1.600000e+01, bfloat -3.200000e+01 + %ldexp = fmul bfloat %x, %y + ret bfloat %ldexp +} + +define bfloat @fmul_select_bf16_test10_sel_log2val_pos65_pos56(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_mov_b32_e32 v3, 0xdb800000 +; GFX7-NEXT: v_bfrev_b32_e32 v4, 7 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0xffffdb80 +; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffe000 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0xffffe000 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xffffdb80, v3, vcc_lo +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v3, 0xffffe000 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xffffdb80, v3, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, bfloat 0xRE000, bfloat 0xRDB80 + %ldexp = fmul bfloat %x, %y + ret bfloat %ldexp +} + +define bfloat @fmul_select_bf16_test11_sel_log2val_neg22_pos25(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { +; GFX7-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: v_bfrev_b32_e32 v3, 50 +; GFX7-NEXT: v_mov_b32_e32 v4, 0x34800000 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, 0x4c00 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x3480 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25: +; GFX1030: ; %bb.0: +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: v_mov_b32_e32 v3, 0x3480 +; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4c00, v3, vcc_lo +; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1030-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v3, 0x3480 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4c00, v3, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %bool = icmp eq i32 %bool.arg1, %bool.arg2 + %y = select i1 %bool, bfloat 0xR3480, bfloat 0xR4C00 + %ldexp = fmul bfloat %x, %y + ret bfloat %ldexp +} +