diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir index fb7c2d4d705e7..95d2bae98df2e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir @@ -274,24 +274,18 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_ASHRREV_I16_e64_]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ASHRREV_I16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: ashr_s16_vv_zext_to_s64 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_ASHRREV_I16_e64_]], implicit $exec - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ASHRREV_I16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-LABEL: name: ashr_s16_vv_zext_to_s64 ; GFX10: liveins: $vgpr0, $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir index 779312596313a..3a2ed71e4d224 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir @@ -79,9 +79,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec - ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_BCNT_U32_B32_e64_]], [[COPY1]], 0, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] + ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_CTPOP %0 @@ -104,9 +103,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec - ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY1]], [[V_BCNT_U32_B32_e64_]], 0, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] + ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_CTPOP %0 @@ -155,9 +153,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec - ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_BCNT_U32_B32_e64_]], [[COPY1]], 0, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] + ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s32) = G_CTPOP %0 @@ -181,9 +178,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY1]], 0, implicit $exec - ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_BCNT_U32_B32_e64_]], [[COPY]], 0, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] + ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s32) = G_CTPOP %1 @@ -207,9 +203,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[COPY]], implicit-def dead $scc - ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[S_BCNT1_I32_B32_]], [[COPY1]], 0, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] + ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:sgpr(s32) = G_CTPOP %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir index e7ec5fcbba247..a96b574a64784 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir @@ -272,24 +272,18 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHRREV_B16_e64_]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHRREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: lshr_s16_vv_zext_to_s64 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHRREV_B16_e64_]], implicit $exec - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHRREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-LABEL: name: lshr_s16_vv_zext_to_s64 ; GFX10: liveins: $vgpr0, $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir index bcb6d75c18302..b0703a642e033 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir @@ -272,24 +272,18 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHLREV_B16_e64_]], implicit $exec - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHLREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX9-LABEL: name: shl_s16_vv_zext_to_s64 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHLREV_B16_e64_]], implicit $exec - ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHLREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] ; GFX10-LABEL: name: shl_s16_vv_zext_to_s64 ; GFX10: liveins: $vgpr0, $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll index 6bb4e2d3dbe26..ed85fb19d9051 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll @@ -204,18 +204,37 @@ define amdgpu_ps <2 x i64> @scalar_xnor_i64_mul_use(i64 inreg %a, i64 inreg %b) } define i32 @vector_xnor_i32_one_use(i32 %a, i32 %b) { -; GCN-LABEL: vector_xnor_i32_one_use: -; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_xor_b32_e32 v0, v0, v1 -; GCN-NEXT: v_not_b32_e32 v0, v0 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX7-LABEL: vector_xnor_i32_one_use: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_not_b32_e32 v0, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: vector_xnor_i32_one_use: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_not_b32_e32 v0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-LABEL: vector_xnor_i32_one_use: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_xor_b32_e32 v0, v0, v1 +; GFX900-NEXT: v_not_b32_e32 v0, v0 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX906-LABEL: vector_xnor_i32_one_use: +; GFX906: ; %bb.0: ; %entry +; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v1 +; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: vector_xnor_i32_one_use: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_xor_b32_e32 v0, v0, v1 -; GFX10-NEXT: v_not_b32_e32 v0, v0 +; GFX10-NEXT: v_xnor_b32_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: %xor = xor i32 %a, %b @@ -224,22 +243,45 @@ entry: } define i64 @vector_xnor_i64_one_use(i64 %a, i64 %b) { -; GCN-LABEL: vector_xnor_i64_one_use: -; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_xor_b32_e32 v0, v0, v2 -; GCN-NEXT: v_xor_b32_e32 v1, v1, v3 -; GCN-NEXT: v_not_b32_e32 v0, v0 -; GCN-NEXT: v_not_b32_e32 v1, v1 -; GCN-NEXT: s_setpc_b64 s[30:31] +; GFX7-LABEL: vector_xnor_i64_one_use: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_xor_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_xor_b32_e32 v1, v1, v3 +; GFX7-NEXT: v_not_b32_e32 v0, v0 +; GFX7-NEXT: v_not_b32_e32 v1, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: vector_xnor_i64_one_use: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_xor_b32_e32 v0, v0, v2 +; GFX8-NEXT: v_xor_b32_e32 v1, v1, v3 +; GFX8-NEXT: v_not_b32_e32 v0, v0 +; GFX8-NEXT: v_not_b32_e32 v1, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-LABEL: vector_xnor_i64_one_use: +; GFX900: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_xor_b32_e32 v0, v0, v2 +; GFX900-NEXT: v_xor_b32_e32 v1, v1, v3 +; GFX900-NEXT: v_not_b32_e32 v0, v0 +; GFX900-NEXT: v_not_b32_e32 v1, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX906-LABEL: vector_xnor_i64_one_use: +; GFX906: ; %bb.0: ; %entry +; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v2 +; GFX906-NEXT: v_xnor_b32_e32 v1, v1, v3 +; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: vector_xnor_i64_one_use: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_xor_b32_e32 v0, v0, v2 -; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3 -; GFX10-NEXT: v_not_b32_e32 v0, v0 -; GFX10-NEXT: v_not_b32_e32 v1, v1 +; GFX10-NEXT: v_xnor_b32_e32 v0, v0, v2 +; GFX10-NEXT: v_xnor_b32_e32 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: %xor = xor i64 %a, %b @@ -248,16 +290,32 @@ entry: } define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) { -; GCN-LABEL: xnor_s_v_i32_one_use: -; GCN: ; %bb.0: -; GCN-NEXT: v_xor_b32_e32 v0, s0, v0 -; GCN-NEXT: v_not_b32_e32 v0, v0 -; GCN-NEXT: ; return to shader part epilog +; GFX7-LABEL: xnor_s_v_i32_one_use: +; GFX7: ; %bb.0: +; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0 +; GFX7-NEXT: v_not_b32_e32 v0, v0 +; GFX7-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: xnor_s_v_i32_one_use: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 +; GFX8-NEXT: v_not_b32_e32 v0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX900-LABEL: xnor_s_v_i32_one_use: +; GFX900: ; %bb.0: +; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0 +; GFX900-NEXT: v_not_b32_e32 v0, v0 +; GFX900-NEXT: ; return to shader part epilog +; +; GFX906-LABEL: xnor_s_v_i32_one_use: +; GFX906: ; %bb.0: +; GFX906-NEXT: v_xnor_b32_e32 v0, s0, v0 +; GFX906-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: xnor_s_v_i32_one_use: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0 -; GFX10-NEXT: v_not_b32_e32 v0, v0 +; GFX10-NEXT: v_xnor_b32_e32 v0, s0, v0 ; GFX10-NEXT: ; return to shader part epilog %xor = xor i32 %s, %v %d = xor i32 %xor, -1 @@ -266,16 +324,32 @@ define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) { } define amdgpu_ps float @xnor_v_s_i32_one_use(i32 inreg %s, i32 %v) { -; GCN-LABEL: xnor_v_s_i32_one_use: -; GCN: ; %bb.0: -; GCN-NEXT: v_xor_b32_e32 v0, s0, v0 -; GCN-NEXT: v_not_b32_e32 v0, v0 -; GCN-NEXT: ; return to shader part epilog +; GFX7-LABEL: xnor_v_s_i32_one_use: +; GFX7: ; %bb.0: +; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0 +; GFX7-NEXT: v_not_b32_e32 v0, v0 +; GFX7-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: xnor_v_s_i32_one_use: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 +; GFX8-NEXT: v_not_b32_e32 v0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX900-LABEL: xnor_v_s_i32_one_use: +; GFX900: ; %bb.0: +; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0 +; GFX900-NEXT: v_not_b32_e32 v0, v0 +; GFX900-NEXT: ; return to shader part epilog +; +; GFX906-LABEL: xnor_v_s_i32_one_use: +; GFX906: ; %bb.0: +; GFX906-NEXT: v_xnor_b32_e64 v0, v0, s0 +; GFX906-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: xnor_v_s_i32_one_use: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0 -; GFX10-NEXT: v_not_b32_e32 v0, v0 +; GFX10-NEXT: v_xnor_b32_e64 v0, v0, s0 ; GFX10-NEXT: ; return to shader part epilog %xor = xor i32 %v, %s %d = xor i32 %xor, -1 @@ -314,19 +388,15 @@ define amdgpu_ps <2 x float> @xnor_i64_s_v_one_use(i64 inreg %a, i64 %b64) { ; GFX906-LABEL: xnor_i64_s_v_one_use: ; GFX906: ; %bb.0: ; %entry ; GFX906-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1] -; GFX906-NEXT: v_xor_b32_e32 v0, s0, v0 -; GFX906-NEXT: v_xor_b32_e32 v1, s1, v1 -; GFX906-NEXT: v_not_b32_e32 v0, v0 -; GFX906-NEXT: v_not_b32_e32 v1, v1 +; GFX906-NEXT: v_xnor_b32_e32 v0, s0, v0 +; GFX906-NEXT: v_xnor_b32_e32 v1, s1, v1 ; GFX906-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: xnor_i64_s_v_one_use: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1] -; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0 -; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1 -; GFX10-NEXT: v_not_b32_e32 v0, v0 -; GFX10-NEXT: v_not_b32_e32 v1, v1 +; GFX10-NEXT: v_xnor_b32_e32 v0, s0, v0 +; GFX10-NEXT: v_xnor_b32_e32 v1, s1, v1 ; GFX10-NEXT: ; return to shader part epilog entry: %b = shl i64 %b64, 29 @@ -367,19 +437,15 @@ define amdgpu_ps <2 x float> @xnor_i64_v_s_one_use(i64 inreg %a, i64 %b64) { ; GFX906-LABEL: xnor_i64_v_s_one_use: ; GFX906: ; %bb.0: ; GFX906-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1] -; GFX906-NEXT: v_xor_b32_e32 v0, s0, v0 -; GFX906-NEXT: v_xor_b32_e32 v1, s1, v1 -; GFX906-NEXT: v_not_b32_e32 v0, v0 -; GFX906-NEXT: v_not_b32_e32 v1, v1 +; GFX906-NEXT: v_xnor_b32_e64 v0, v0, s0 +; GFX906-NEXT: v_xnor_b32_e64 v1, v1, s1 ; GFX906-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: xnor_i64_v_s_one_use: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1] -; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0 -; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1 -; GFX10-NEXT: v_not_b32_e32 v0, v0 -; GFX10-NEXT: v_not_b32_e32 v1, v1 +; GFX10-NEXT: v_xnor_b32_e64 v0, v0, s0 +; GFX10-NEXT: v_xnor_b32_e64 v1, v1, s1 ; GFX10-NEXT: ; return to shader part epilog %b = shl i64 %b64, 29 %xor = xor i64 %b, %a @@ -419,7 +485,7 @@ define i32 @vector_xor_na_b_i32_one_use(i32 %a, i32 %b) { ; GFX10-LABEL: vector_xor_na_b_i32_one_use: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_xor3_b32 v0, v0, -1, v1 +; GFX10-NEXT: v_xnor_b32_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: %na = xor i32 %a, -1 @@ -458,7 +524,7 @@ define i32 @vector_xor_a_nb_i32_one_use(i32 %a, i32 %b) { ; GFX10-LABEL: vector_xor_a_nb_i32_one_use: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_xor3_b32 v0, v1, -1, v0 +; GFX10-NEXT: v_xnor_b32_e32 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: %nb = xor i32 %b, -1 diff --git a/llvm/test/CodeGen/AMDGPU/constrained-shift.ll b/llvm/test/CodeGen/AMDGPU/constrained-shift.ll index 1b35a89ad7f93..4011c21af6904 100644 --- a/llvm/test/CodeGen/AMDGPU/constrained-shift.ll +++ b/llvm/test/CodeGen/AMDGPU/constrained-shift.ll @@ -139,10 +139,6 @@ define <4 x i32> @csh_v4i32(<4 x i32> %a, <4 x i32> %b) { ; GISEL-LABEL: csh_v4i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_and_b32_e32 v4, 31, v4 -; GISEL-NEXT: v_and_b32_e32 v5, 31, v5 -; GISEL-NEXT: v_and_b32_e32 v6, 31, v6 -; GISEL-NEXT: v_and_b32_e32 v7, 31, v7 ; GISEL-NEXT: v_lshlrev_b32_e32 v8, v4, v0 ; GISEL-NEXT: v_lshlrev_b32_e32 v9, v5, v1 ; GISEL-NEXT: v_lshlrev_b32_e32 v10, v6, v2 diff --git a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll index 9f093cc7b5abf..26a4ea9d8a4b6 100644 --- a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll +++ b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll @@ -230,49 +230,27 @@ entry: } define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) { -; GFX67-SDAG-LABEL: clpeak_imad_pat_i16: -; GFX67-SDAG: ; %bb.0: ; %entry -; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v1 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v0, v2 -; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v1, v3 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v4 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1 -; GFX67-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 -; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX67-GISEL-LABEL: clpeak_imad_pat_i16: -; GFX67-GISEL: ; %bb.0: ; %entry -; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1 -; GFX67-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 -; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX67-LABEL: clpeak_imad_pat_i16: +; GFX67: ; %bb.0: ; %entry +; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0 +; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v1 +; GFX67-NEXT: v_mul_u32_u24_e32 v3, v0, v2 +; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1 +; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX67-NEXT: v_mul_u32_u24_e32 v4, v1, v3 +; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, 1 +; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v4 +; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2 +; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, 1 +; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1 +; GFX67-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX67-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-SDAG-LABEL: clpeak_imad_pat_i16: ; GFX8-SDAG: ; %bb.0: ; %entry @@ -337,11 +315,11 @@ define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) { ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0 -; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 +; GFX10-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -363,13 +341,13 @@ define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) { ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0 -; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 +; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 ; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 @@ -400,13 +378,13 @@ define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) { ; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 -; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 +; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 ; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 +; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 ; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 @@ -470,42 +448,40 @@ define <2 x i16> @clpeak_imad_pat_v2i16(<2 x i16> %x, <2 x i16> %y) { ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v3 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v5, v3, v1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v4, v2, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v5, v5, v3, 1 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v4, v4, v2, 1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v0 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v1, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX67-GISEL-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, 1 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v0, v2 +; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, 1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v6 +; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4 +; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3 +; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1 +; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4 -; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i16: @@ -682,46 +658,43 @@ define <3 x i16> @clpeak_imad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) { ; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v6, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v7, v4 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v6, v3, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v2 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v8, v8, v5 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v7, v1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v7, v4, v1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v8, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v8, v5, v2 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v9, v0, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v6 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v6, v6, v3, 1 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v10, v1, v4 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v7 -; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v7, v7, v4, 1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v3, 1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v4, 1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v9 +; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v6 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v11, v2, v5 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v8, v8, v5, 1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v2, v5, 1 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v4 +; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v10 +; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v7 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v5 +; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v11 +; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v8 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v5 -; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v8 -; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4 -; GFX67-GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v5 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v6 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v6 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v3, v0 +; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v5 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v8 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v3, v2 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-SDAG-LABEL: clpeak_imad_pat_v3i16: @@ -1063,19 +1036,15 @@ define <4 x i16> @clpeak_imad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) { ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 ; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v10, v10, v5 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v10, v5, v1 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v11, 16, v9 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v8, v8, v4 ; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9 ; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6 ; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v9, v9, v6 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v11, v11, v7 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v8, v4, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v9, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v11, v3 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v9, v6, v2 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v11, v7, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 @@ -1085,60 +1054,60 @@ define <4 x i16> @clpeak_imad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) { ; GFX67-GISEL-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v5 -; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v10 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v13, v2, v5 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v10, v10, v5, 1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v2, v5, 1 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v4 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v12, v0, v4 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v8, v8, v4, 1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v10 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v4, 1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v8 -; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v6 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v7 -; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v9 -; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v11 -; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 -; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5 -; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v6 -; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v7 -; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6 -; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v2 -; GFX67-GISEL-NEXT: v_or_b32_e32 v5, v5, v6 -; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7 -; GFX67-GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v3 -; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6 -; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7 -; GFX67-GISEL-NEXT: v_or_b32_e32 v6, v6, v7 -; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v8 -; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v9 -; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX67-GISEL-NEXT: v_or_b32_e32 v7, v7, v8 -; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v4 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v15, v3, v7 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v11, v11, v7, 1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8 +; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v3, v7, 1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v14, v1, v6 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v9, v9, v6, 1 +; GFX67-GISEL-NEXT: v_or_b32_e32 v8, v8, v10 +; GFX67-GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v11 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v6, 1 +; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3 +; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9 +; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10 +; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v12 +; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v8 +; GFX67-GISEL-NEXT: v_or_b32_e32 v9, v9, v10 +; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v1, v2 +; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v8 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v5 +; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v13 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v5, v1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v14 +; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v9 +; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v9 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v6 +; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v15 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v6, v3 +; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v4 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v4, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v5 -; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v9, 16, v5 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v8 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v1, v4 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v3 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v1, v9 -; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v6 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v7 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v2, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v4 -; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v7 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v6 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v4 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v5 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v6 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v4, v2 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v7 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-SDAG-LABEL: clpeak_imad_pat_v4i16: @@ -1403,47 +1372,26 @@ entry: } define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) { -; GFX67-SDAG-LABEL: clpeak_umad_pat_i16: -; GFX67-SDAG: ; %bb.0: ; %entry -; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v0, v1 -; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, 1, v1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v3, v2 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v1, 1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v4 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v3, v2, 1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX67-GISEL-LABEL: clpeak_umad_pat_i16: -; GFX67-GISEL: ; %bb.0: ; %entry -; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX67-LABEL: clpeak_umad_pat_i16: +; GFX67: ; %bb.0: ; %entry +; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0 +; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-NEXT: v_mul_u32_u24_e32 v2, v0, v1 +; GFX67-NEXT: v_add_i32_e32 v3, vcc, 1, v1 +; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX67-NEXT: v_mul_u32_u24_e32 v4, v3, v2 +; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, 1 +; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v4 +; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1 +; GFX67-NEXT: v_mad_u32_u24 v1, v3, v2, 1 +; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1 +; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-SDAG-LABEL: clpeak_umad_pat_i16: ; GFX8-SDAG: ; %bb.0: ; %entry @@ -1504,11 +1452,11 @@ define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) { ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0 -; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 +; GFX10-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -1530,13 +1478,13 @@ define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) { ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0 -; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 +; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 ; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -1567,13 +1515,13 @@ define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) { ; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 -; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 +; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 ; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 +; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 ; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -1637,42 +1585,40 @@ define <2 x i16> @clpeak_umad_pat_v2i16(<2 x i16> %x, <2 x i16> %y) { ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v3 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v5, v3, v1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v4, v2, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v5, v5, v3, 1 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v4, v4, v2, 1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v0 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v1, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX67-GISEL-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, 1 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v0, v2 +; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, 1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v6 +; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4 +; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3 +; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1 +; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4 -; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-SDAG-LABEL: clpeak_umad_pat_v2i16: @@ -1849,46 +1795,43 @@ define <3 x i16> @clpeak_umad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) { ; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v6, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v7, v4 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v6, v3, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v2 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v8, v8, v5 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v7, v1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v7, v4, v1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v8, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v8, v5, v2 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v9, v0, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v6 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v6, v6, v3, 1 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v10, v1, v4 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v7 -; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v7, v7, v4, 1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v3, 1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v4, 1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v9 +; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v6 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v11, v2, v5 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v8, v8, v5, 1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v2, v5, 1 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v4 +; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v10 +; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v7 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v5 +; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v11 +; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v8 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v5 -; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v8 -; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4 -; GFX67-GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v5 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v6 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v6 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v3, v0 +; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v5 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v8 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v3, v2 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-SDAG-LABEL: clpeak_umad_pat_v3i16: @@ -2230,19 +2173,15 @@ define <4 x i16> @clpeak_umad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) { ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 ; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v10, v10, v5 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v10, v5, v1 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v11, 16, v9 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v8, v8, v4 ; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9 ; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6 ; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v9, v9, v6 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v11, v11, v7 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v8, v4, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v9, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v11, v3 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v9, v6, v2 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v11, v7, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 @@ -2252,60 +2191,60 @@ define <4 x i16> @clpeak_umad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) { ; GFX67-GISEL-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v5 -; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v10 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v13, v2, v5 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v10, v10, v5, 1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v2, v5, 1 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v4 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v12, v0, v4 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v8, v8, v4, 1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v10 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v4, 1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v8 -; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v6 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v7 -; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v9 -; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v11 -; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 -; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5 -; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v6 -; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v7 -; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6 -; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v2 -; GFX67-GISEL-NEXT: v_or_b32_e32 v5, v5, v6 -; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7 -; GFX67-GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v3 -; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6 -; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7 -; GFX67-GISEL-NEXT: v_or_b32_e32 v6, v6, v7 -; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v8 -; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v9 -; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX67-GISEL-NEXT: v_or_b32_e32 v7, v7, v8 -; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v4 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v15, v3, v7 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v11, v11, v7, 1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8 +; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v3, v7, 1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v14, v1, v6 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v9, v9, v6, 1 +; GFX67-GISEL-NEXT: v_or_b32_e32 v8, v8, v10 +; GFX67-GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v11 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v6, 1 +; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3 +; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9 +; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10 +; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v12 +; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v8 +; GFX67-GISEL-NEXT: v_or_b32_e32 v9, v9, v10 +; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v1, v2 +; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v8 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v5 +; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v13 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v5, v1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v14 +; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v9 +; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v9 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v6 +; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v15 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v6, v3 +; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v4 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v4, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v5 -; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v9, 16, v5 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v8 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v1, v4 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v3 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v1, v9 -; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v6 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v7 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v2, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v4 -; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v7 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v6 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v4 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v5 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v6 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v4, v2 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v7 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-SDAG-LABEL: clpeak_umad_pat_v4i16: @@ -4282,49 +4221,27 @@ entry: } define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) { -; GFX67-SDAG-LABEL: clpeak_imad_pat_i8: -; GFX67-SDAG: ; %bb.0: ; %entry -; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v1 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v0, v2 -; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v1, v3 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v4 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1 -; GFX67-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX67-GISEL-LABEL: clpeak_imad_pat_i8: -; GFX67-GISEL: ; %bb.0: ; %entry -; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v0 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1 -; GFX67-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX67-LABEL: clpeak_imad_pat_i8: +; GFX67: ; %bb.0: ; %entry +; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0 +; GFX67-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX67-NEXT: v_and_b32_e32 v2, 0xff, v1 +; GFX67-NEXT: v_mul_u32_u24_e32 v3, v0, v2 +; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1 +; GFX67-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX67-NEXT: v_and_b32_e32 v3, 0xff, v3 +; GFX67-NEXT: v_mul_u32_u24_e32 v4, v1, v3 +; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, 1 +; GFX67-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX67-NEXT: v_and_b32_e32 v2, 0xff, v4 +; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2 +; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, 1 +; GFX67-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX67-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1 +; GFX67-NEXT: v_bfe_i32 v0, v0, 0, 8 +; GFX67-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-SDAG-LABEL: clpeak_imad_pat_i8: ; GFX8-SDAG: ; %bb.0: ; %entry @@ -4389,11 +4306,11 @@ define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) { ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0 -; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 +; GFX10-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4415,13 +4332,13 @@ define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) { ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0 -; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 +; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 ; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8 @@ -4452,13 +4369,13 @@ define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) { ; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 -; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 +; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 ; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 +; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 ; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8 @@ -4524,32 +4441,30 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) { ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v2 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v3 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v4, v2, v0 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v5, v3, v1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v0, v2 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 -; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v4, v4, v2, 1 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v1, v3 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v5, v5, v3, 1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, 1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, 1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v6 +; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v4 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3 +; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v7 +; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v5 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v4 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v4 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0 +; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v5 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v2, v1 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i8: @@ -4655,20 +4570,18 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) { ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v4, v0, v2 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v5, v1, v3 -; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v4, v0 -; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v5, v1 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3 -; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v4, 1 -; GFX10-GISEL-NEXT: v_add_nc_u16 v3, v5, 1 -; GFX10-GISEL-NEXT: v_add_nc_u16 v4, v0, 1 -; GFX10-GISEL-NEXT: v_add_nc_u16 v5, v1, 1 +; GFX10-GISEL-NEXT: v_mad_u16 v4, v0, v2, v0 +; GFX10-GISEL-NEXT: v_mad_u16 v5, v1, v3, v1 +; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v2, 1 +; GFX10-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v6, v4, v2 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v7, v5, v3 +; GFX10-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1 +; GFX10-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v6, v0 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v7, v1 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v4 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v5 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: clpeak_imad_pat_v2i8: @@ -4704,25 +4617,21 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) { ; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_mul_lo_u16 v4, v0, v2 -; GFX11-GISEL-NEXT: v_mul_lo_u16 v5, v1, v3 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v4, v0 -; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v5, v1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2 -; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3 -; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v4, 1 -; GFX11-GISEL-NEXT: v_add_nc_u16 v3, v5, 1 +; GFX11-GISEL-NEXT: v_mad_u16 v4, v0, v2, v0 +; GFX11-GISEL-NEXT: v_mad_u16 v5, v1, v3, v1 +; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v2, 1 +; GFX11-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-GISEL-NEXT: v_add_nc_u16 v4, v0, 1 -; GFX11-GISEL-NEXT: v_add_nc_u16 v5, v1, 1 +; GFX11-GISEL-NEXT: v_mul_lo_u16 v6, v4, v2 +; GFX11-GISEL-NEXT: v_mul_lo_u16 v7, v5, v3 +; GFX11-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1 +; GFX11-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v6, v0 +; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v7, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2 ; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v4 -; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v5 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX1200-SDAG-LABEL: clpeak_imad_pat_v2i8: @@ -4766,25 +4675,21 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) { ; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 ; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v4, v0, v2 -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v5, v1, v3 -; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v4, v0 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v5, v1 -; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2 -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v4, 1 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v3, v5, 1 +; GFX1200-GISEL-NEXT: v_mad_u16 v4, v0, v2, v0 +; GFX1200-GISEL-NEXT: v_mad_u16 v5, v1, v3, v1 +; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v2, 1 +; GFX1200-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1 ; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1200-GISEL-NEXT: v_add_nc_u16 v4, v0, 1 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v5, v1, 1 +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v6, v4, v2 +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v7, v5, v3 +; GFX1200-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1 +; GFX1200-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1 ; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v6, v0 +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v7, v1 +; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2 ; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3 -; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v4 -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v5 ; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] entry: %y18 = add <2 x i8> %x, @@ -7600,81 +7505,43 @@ entry: } define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) { -; GFX67-SDAG-LABEL: clpeak_imad_pat_i16_x2: -; GFX67-SDAG: ; %bb.0: ; %entry -; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v1 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v0, v2 -; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v1, v3 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v4 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v0, v2 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v1, v3 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v4 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v0, v2 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v1, v3 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v4 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1 -; GFX67-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 -; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX67-GISEL-LABEL: clpeak_imad_pat_i16_x2: -; GFX67-GISEL: ; %bb.0: ; %entry -; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1 -; GFX67-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 -; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX67-LABEL: clpeak_imad_pat_i16_x2: +; GFX67: ; %bb.0: ; %entry +; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0 +; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v1 +; GFX67-NEXT: v_mul_u32_u24_e32 v3, v0, v2 +; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1 +; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX67-NEXT: v_mul_u32_u24_e32 v4, v1, v3 +; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, 1 +; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v4 +; GFX67-NEXT: v_mul_u32_u24_e32 v4, v0, v2 +; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, 1 +; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v4 +; GFX67-NEXT: v_mul_u32_u24_e32 v4, v1, v3 +; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, 1 +; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v4 +; GFX67-NEXT: v_mul_u32_u24_e32 v4, v0, v2 +; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, 1 +; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v4 +; GFX67-NEXT: v_mul_u32_u24_e32 v4, v1, v3 +; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, 1 +; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v4 +; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2 +; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, 1 +; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1 +; GFX67-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX67-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-SDAG-LABEL: clpeak_imad_pat_i16_x2: ; GFX8-SDAG: ; %bb.0: ; %entry @@ -7767,19 +7634,19 @@ define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) { ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX10-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX10-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0 -; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0 -; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0 -; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 +; GFX10-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -7807,23 +7674,23 @@ define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) { ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0 -; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 -; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0 -; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 -; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0 -; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX11-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX11-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 +; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 ; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 @@ -7860,23 +7727,23 @@ define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) { ; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 -; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 -; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 -; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 +; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX1200-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 +; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX1200-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 +; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 ; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 +; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 ; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 @@ -7902,79 +7769,42 @@ entry: } define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) { -; GFX67-SDAG-LABEL: clpeak_umad_pat_i16_x2: -; GFX67-SDAG: ; %bb.0: ; %entry -; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v0, v1 -; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, 1, v1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v3, v2 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v1, 1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v4 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v0, v1 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v3, v2, 1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v2, v3 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v1, 1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v4 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v0, v1 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v2, v3, 1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v2, v3 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v1, 1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v4 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v2, v3, 1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1 -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX67-GISEL-LABEL: clpeak_umad_pat_i16_x2: -; GFX67-GISEL: ; %bb.0: ; %entry -; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX67-LABEL: clpeak_umad_pat_i16_x2: +; GFX67: ; %bb.0: ; %entry +; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0 +; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-NEXT: v_mul_u32_u24_e32 v2, v0, v1 +; GFX67-NEXT: v_add_i32_e32 v3, vcc, 1, v1 +; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX67-NEXT: v_mul_u32_u24_e32 v4, v3, v2 +; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, 1 +; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v4 +; GFX67-NEXT: v_mul_u32_u24_e32 v4, v0, v1 +; GFX67-NEXT: v_mad_u32_u24 v2, v3, v2, 1 +; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v4 +; GFX67-NEXT: v_mul_u32_u24_e32 v4, v2, v3 +; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, 1 +; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v4 +; GFX67-NEXT: v_mul_u32_u24_e32 v4, v0, v1 +; GFX67-NEXT: v_mad_u32_u24 v2, v2, v3, 1 +; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v4 +; GFX67-NEXT: v_mul_u32_u24_e32 v4, v2, v3 +; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, 1 +; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v4 +; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1 +; GFX67-NEXT: v_mad_u32_u24 v1, v2, v3, 1 +; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1 +; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-SDAG-LABEL: clpeak_umad_pat_i16_x2: ; GFX8-SDAG: ; %bb.0: ; %entry @@ -8063,19 +7893,19 @@ define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) { ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX10-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX10-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0 -; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0 -; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0 -; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 -; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 +; GFX10-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -8103,23 +7933,23 @@ define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) { ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0 -; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 -; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0 -; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 -; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0 -; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX11-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX11-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 +; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 ; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -8156,23 +7986,23 @@ define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) { ; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 ; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 -; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 -; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 -; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 +; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX1200-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 +; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX1200-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 +; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 ; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 +; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 ; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -8268,10 +8098,8 @@ define <2 x i16> @clpeak_imad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) { ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v3 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v5, v3, v1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v4, v2, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 @@ -8279,9 +8107,9 @@ define <2 x i16> @clpeak_imad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) { ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v5, v3, 1 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v4, v2, 1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v2 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 @@ -8290,64 +8118,60 @@ define <2 x i16> @clpeak_imad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) { ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v1, v5 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v4 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3 -; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v1, v5, v3 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v0, v4, v2 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0 -; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4 -; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v3 -; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v2 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v5, 1 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v0 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v4, 1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v0 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v1, v5 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v4 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3 -; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0 -; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX67-GISEL-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v3, v5, v1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v2, v4, v0 +; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v5, v3, v5, 1 +; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v4, v2, v4, 1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 +; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v1, v3 +; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, 1 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v0, v2 +; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, 1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4 -; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v6 +; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4 +; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3 +; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1 +; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i16_x2: @@ -8591,10 +8415,8 @@ define <2 x i16> @clpeak_umad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) { ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v3 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v5, v3, v1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v4, v2, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 @@ -8602,9 +8424,9 @@ define <2 x i16> @clpeak_umad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) { ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v5, v3, 1 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v4, v2, 1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v2 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 @@ -8613,64 +8435,60 @@ define <2 x i16> @clpeak_umad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) { ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v1, v5 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v4 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3 -; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v1, v5, v3 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v0, v4, v2 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0 -; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4 -; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v3 -; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v2 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v5, 1 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v0 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v4, 1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v0 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v1, v5 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v4 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3 -; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0 -; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v1 -; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX67-GISEL-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v3, v5, v1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v2, v4, v0 +; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v5, v3, v5, 1 +; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v4, v2, v4, 1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 +; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v1, v3 +; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, 1 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v0, v2 +; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, 1 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4 -; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v6 +; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4 +; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3 +; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1 +; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-SDAG-LABEL: clpeak_umad_pat_v2i16_x2: @@ -8908,24 +8726,14 @@ entry: } define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) { -; GFX67-SDAG-LABEL: multi_use_mul_mad_i16_var: -; GFX67-SDAG: ; %bb.0: ; %entry -; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v4, v1, v2 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v4, v1, v3 -; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX67-GISEL-LABEL: multi_use_mul_mad_i16_var: -; GFX67-GISEL: ; %bb.0: ; %entry -; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v0, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v2 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 -; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX67-LABEL: multi_use_mul_mad_i16_var: +; GFX67: ; %bb.0: ; %entry +; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX67-NEXT: v_and_b32_e32 v4, 0xffff, v0 +; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX67-NEXT: v_mad_u32_u24 v0, v4, v1, v2 +; GFX67-NEXT: v_mad_u32_u24 v1, v4, v1, v3 +; GFX67-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-SDAG-LABEL: multi_use_mul_mad_i16_var: ; GFX8-SDAG: ; %bb.0: ; %entry @@ -8973,10 +8781,9 @@ define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) { ; GFX10-GISEL-LABEL: multi_use_mul_mad_i16_var: ; GFX10-GISEL: ; %bb.0: ; %entry ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v0, v2 -; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, v3 -; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX10-GISEL-NEXT: v_mad_u16 v2, v0, v1, v2 +; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, v3 +; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v2 ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -8992,12 +8799,10 @@ define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) { ; GFX11-GISEL-LABEL: multi_use_mul_mad_i16_var: ; GFX11-GISEL: ; %bb.0: ; %entry ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v0, v2 -; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, v3 -; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_mad_u16 v2, v0, v1, v2 +; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, v3 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v2 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -9021,12 +8826,10 @@ define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) { ; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 ; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 ; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v0, v2 -; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, v3 -; GFX1200-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1200-GISEL-NEXT: v_mad_u16 v2, v0, v1, v2 +; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, v3 +; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1200-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v2 ; GFX1200-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 ; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] entry: @@ -9108,29 +8911,17 @@ entry: } define i16 @other_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z, ptr addrspace(3) %ptr) { -; GFX67-SDAG-LABEL: other_use_mul_mad_i16_var: -; GFX67-SDAG: ; %bb.0: ; %entry -; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v0, v1 -; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v1, v2 -; GFX67-SDAG-NEXT: s_mov_b32 m0, -1 -; GFX67-SDAG-NEXT: ds_write_b16 v3, v4 -; GFX67-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX67-GISEL-LABEL: other_use_mul_mad_i16_var: -; GFX67-GISEL: ; %bb.0: ; %entry -; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v0, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v2 -; GFX67-GISEL-NEXT: s_mov_b32 m0, -1 -; GFX67-GISEL-NEXT: ds_write_b16 v3, v1 -; GFX67-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX67-LABEL: other_use_mul_mad_i16_var: +; GFX67: ; %bb.0: ; %entry +; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX67-NEXT: v_mul_u32_u24_e32 v4, v0, v1 +; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, v2 +; GFX67-NEXT: s_mov_b32 m0, -1 +; GFX67-NEXT: ds_write_b16 v3, v4 +; GFX67-NEXT: s_waitcnt lgkmcnt(0) +; GFX67-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: other_use_mul_mad_i16_var: ; GFX8: ; %bb.0: ; %entry @@ -9151,69 +8942,36 @@ define i16 @other_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z, ptr addrspace(3) % ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-SDAG-LABEL: other_use_mul_mad_i16_var: -; GFX10-SDAG: ; %bb.0: ; %entry -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_mul_lo_u16 v4, v0, v1 -; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v1, v2 -; GFX10-SDAG-NEXT: ds_write_b16 v3, v4 -; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: other_use_mul_mad_i16_var: -; GFX10-GISEL: ; %bb.0: ; %entry -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v0, v1 -; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v1, v2 -; GFX10-GISEL-NEXT: ds_write_b16 v3, v1 -; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: other_use_mul_mad_i16_var: -; GFX11-SDAG: ; %bb.0: ; %entry -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_mul_lo_u16 v4, v0, v1 -; GFX11-SDAG-NEXT: v_mad_u16 v0, v0, v1, v2 -; GFX11-SDAG-NEXT: ds_store_b16 v3, v4 -; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: other_use_mul_mad_i16_var: -; GFX11-GISEL: ; %bb.0: ; %entry -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v0, v1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v1, v2 -; GFX11-GISEL-NEXT: ds_store_b16 v3, v1 -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: other_use_mul_mad_i16_var: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_mul_lo_u16 v4, v0, v1 +; GFX10-NEXT: v_mad_u16 v0, v0, v1, v2 +; GFX10-NEXT: ds_write_b16 v3, v4 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX1200-SDAG-LABEL: other_use_mul_mad_i16_var: -; GFX1200-SDAG: ; %bb.0: ; %entry -; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 -; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 -; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 -; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1200-SDAG-NEXT: v_mul_lo_u16 v4, v0, v1 -; GFX1200-SDAG-NEXT: v_mad_u16 v0, v0, v1, v2 -; GFX1200-SDAG-NEXT: ds_store_b16 v3, v4 -; GFX1200-SDAG-NEXT: s_wait_dscnt 0x0 -; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: other_use_mul_mad_i16_var: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_mul_lo_u16 v4, v0, v1 +; GFX11-NEXT: v_mad_u16 v0, v0, v1, v2 +; GFX11-NEXT: ds_store_b16 v3, v4 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] ; -; GFX1200-GISEL-LABEL: other_use_mul_mad_i16_var: -; GFX1200-GISEL: ; %bb.0: ; %entry -; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 -; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 -; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 -; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v0, v1 -; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v1, v2 -; GFX1200-GISEL-NEXT: ds_store_b16 v3, v1 -; GFX1200-GISEL-NEXT: s_wait_dscnt 0x0 -; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1200-LABEL: other_use_mul_mad_i16_var: +; GFX1200: ; %bb.0: ; %entry +; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1200-NEXT: s_wait_expcnt 0x0 +; GFX1200-NEXT: s_wait_samplecnt 0x0 +; GFX1200-NEXT: s_wait_bvhcnt 0x0 +; GFX1200-NEXT: s_wait_kmcnt 0x0 +; GFX1200-NEXT: v_mul_lo_u16 v4, v0, v1 +; GFX1200-NEXT: v_mad_u16 v0, v0, v1, v2 +; GFX1200-NEXT: ds_store_b16 v3, v4 +; GFX1200-NEXT: s_wait_dscnt 0x0 +; GFX1200-NEXT: s_setpc_b64 s[30:31] entry: %mul = mul i16 %x, %y %add0 = add i16 %mul, %z @@ -9246,16 +9004,14 @@ define <4 x i16> @multi_use_mul_mad_v2i16_var(<2 x i16> %x, <2 x i16> %y, <2 x i ; GFX67-GISEL-LABEL: multi_use_mul_mad_v2i16_var: ; GFX67-GISEL: ; %bb.0: ; %entry ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v0, v2 -; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v3 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v0, v1 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v4 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v3, v5 -; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v8, v2, v4 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v9, v3, v5 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v8, v2, v6 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v9, v3, v7 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-SDAG-LABEL: multi_use_mul_mad_v2i16_var: @@ -9366,20 +9122,20 @@ define <2 x i16> @other_use_mul_mad_v2i16_var(<2 x i16> %x, <2 x i16> %y, <2 x i ; GFX67-GISEL-LABEL: other_use_mul_mad_v2i16_var: ; GFX67-GISEL: ; %bb.0: ; %entry ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3 -; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2 -; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v1 -; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0 -; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v8, v1, v3 +; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v0, v2 +; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8 +; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7 +; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8 +; GFX67-GISEL-NEXT: v_or_b32_e32 v7, v7, v8 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, v4 +; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, v5 ; GFX67-GISEL-NEXT: s_mov_b32 m0, -1 -; GFX67-GISEL-NEXT: ds_write_b32 v6, v2 +; GFX67-GISEL-NEXT: ds_write_b32 v6, v7 ; GFX67-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -9532,29 +9288,15 @@ define i64 @mul_u24_add64(i32 %x, i32 %y, i64 %z) { ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX1200-SDAG-LABEL: mul_u24_add64: -; GFX1200-SDAG: ; %bb.0: -; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0 -; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0 -; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0 -; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v1, v[2:3] -; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX1200-GISEL-LABEL: mul_u24_add64: -; GFX1200-GISEL: ; %bb.0: -; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0 -; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0 -; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0 -; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1200-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v1 -; GFX1200-GISEL-NEXT: v_mul_hi_u32_u24_e32 v1, v0, v1 -; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1200-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v4, v2 -; GFX1200-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1200-LABEL: mul_u24_add64: +; GFX1200: ; %bb.0: +; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1200-NEXT: s_wait_expcnt 0x0 +; GFX1200-NEXT: s_wait_samplecnt 0x0 +; GFX1200-NEXT: s_wait_bvhcnt 0x0 +; GFX1200-NEXT: s_wait_kmcnt 0x0 +; GFX1200-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v1, v[2:3] +; GFX1200-NEXT: s_setpc_b64 s[30:31] %mul = call i64 @llvm.amdgcn.mul.u24.i64(i32 %x, i32 %y) %add = add i64 %mul, %z ret i64 %add diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index 83599e789e10b..84f23985b6421 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -1350,13 +1350,10 @@ Expected GlobalISelEmitter::importExplicitUseRenderer( // Handle the case where the MVT/register class is omitted in the dest pattern // but MVT exists in the source pattern. - if (isa(DstChild.getLeafValue())) { - for (const TreePatternNode &SrcChild : Src.children()) { - if (SrcChild.getName() == DstChild.getName()) { - DstMIBuilder.addRenderer(SrcChild.getName()); - return InsertPt; - } - } + if (isa(DstChild.getLeafValue()) && + Rule.hasOperand(DstChild.getName())) { + DstMIBuilder.addRenderer(DstChild.getName()); + return InsertPt; } return failedImport("Dst pattern child is an unsupported kind"); }