Skip to content

Commit 2bc9358

Browse files
authored
[DAG] Constant Folding for U/SMUL_LOHI (#69437)
1 parent f3b20cb commit 2bc9358

File tree

3 files changed

+55
-24
lines changed

3 files changed

+55
-24
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5351,6 +5351,10 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
53515351
EVT VT = N->getValueType(0);
53525352
SDLoc DL(N);
53535353

5354+
// Constant fold.
5355+
if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5356+
return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5357+
53545358
// canonicalize constant to RHS (vector doesn't have to splat)
53555359
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
53565360
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
@@ -5389,6 +5393,10 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
53895393
EVT VT = N->getValueType(0);
53905394
SDLoc DL(N);
53915395

5396+
// Constant fold.
5397+
if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5398+
return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5399+
53925400
// canonicalize constant to RHS (vector doesn't have to splat)
53935401
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
53945402
!DAG.isConstantIntBuildVectorOrConstantInt(N1))

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9903,6 +9903,28 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
99039903
VTList.VTs[0] == Ops[0].getValueType() &&
99049904
VTList.VTs[0] == Ops[1].getValueType() &&
99059905
"Binary operator types must match!");
9906+
// Constant fold.
9907+
ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(Ops[0]);
9908+
ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ops[1]);
9909+
if (LHS && RHS) {
9910+
unsigned Width = VTList.VTs[0].getScalarSizeInBits();
9911+
unsigned OutWidth = Width * 2;
9912+
APInt Val = LHS->getAPIntValue();
9913+
APInt Mul = RHS->getAPIntValue();
9914+
if (Opcode == ISD::SMUL_LOHI) {
9915+
Val = Val.sext(OutWidth);
9916+
Mul = Mul.sext(OutWidth);
9917+
} else {
9918+
Val = Val.zext(OutWidth);
9919+
Mul = Mul.zext(OutWidth);
9920+
}
9921+
Val *= Mul;
9922+
9923+
SDValue Hi =
9924+
getConstant(Val.extractBits(Width, Width), DL, VTList.VTs[0]);
9925+
SDValue Lo = getConstant(Val.trunc(Width), DL, VTList.VTs[0]);
9926+
return getNode(ISD::MERGE_VALUES, DL, VTList, {Lo, Hi}, Flags);
9927+
}
99069928
break;
99079929
}
99089930
case ISD::FFREXP: {

llvm/test/CodeGen/AMDGPU/udiv.ll

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2598,37 +2598,38 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
25982598
; VI-LABEL: v_test_udiv64_mulhi_fold:
25992599
; VI: ; %bb.0:
26002600
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2601-
; VI-NEXT: v_mov_b32_e32 v4, 0xa7c5
2602-
; VI-NEXT: v_mul_u32_u24_e32 v3, 0x500, v4
2603-
; VI-NEXT: v_mul_hi_u32_u24_e32 v2, 0x500, v4
2604-
; VI-NEXT: v_add_u32_e32 v3, vcc, 0x4237, v3
2605-
; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v2, vcc
2606-
; VI-NEXT: v_add_u32_e32 v6, vcc, 0xa9000000, v3
2607-
; VI-NEXT: s_mov_b32 s6, 0xfffe7960
2608-
; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0
2609-
; VI-NEXT: v_addc_u32_e32 v7, vcc, v5, v4, vcc
2610-
; VI-NEXT: v_mul_lo_u32 v4, v7, s6
2601+
; VI-NEXT: s_mov_b32 s4, 0x346d900
2602+
; VI-NEXT: s_add_u32 s4, 0x4237, s4
2603+
; VI-NEXT: v_mov_b32_e32 v2, 0xa9000000
2604+
; VI-NEXT: v_add_u32_e32 v6, vcc, s4, v2
2605+
; VI-NEXT: s_mov_b32 s4, 0xfffe7960
2606+
; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s4, 0
2607+
; VI-NEXT: s_addc_u32 s6, 0, 0
2608+
; VI-NEXT: s_cmp_lg_u64 vcc, 0
2609+
; VI-NEXT: s_addc_u32 s6, s6, 0xa7c5
2610+
; VI-NEXT: s_mul_i32 s4, s6, 0xfffe7960
26112611
; VI-NEXT: v_sub_u32_e32 v3, vcc, v3, v6
2612-
; VI-NEXT: v_mul_hi_u32 v8, v6, v2
2613-
; VI-NEXT: v_add_u32_e32 v5, vcc, v4, v3
2612+
; VI-NEXT: v_add_u32_e32 v5, vcc, s4, v3
26142613
; VI-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0
2615-
; VI-NEXT: v_add_u32_e32 v8, vcc, v8, v3
2616-
; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v2, 0
2617-
; VI-NEXT: v_addc_u32_e32 v9, vcc, 0, v4, vcc
2618-
; VI-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v5, 0
2619-
; VI-NEXT: v_add_u32_e32 v2, vcc, v8, v2
2620-
; VI-NEXT: v_addc_u32_e32 v2, vcc, v9, v3, vcc
2614+
; VI-NEXT: v_mul_hi_u32 v7, v6, v2
2615+
; VI-NEXT: v_add_u32_e32 v7, vcc, v7, v3
2616+
; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], s6, v2, 0
2617+
; VI-NEXT: v_addc_u32_e32 v8, vcc, 0, v4, vcc
2618+
; VI-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s6, v5, 0
2619+
; VI-NEXT: v_add_u32_e32 v2, vcc, v7, v2
2620+
; VI-NEXT: v_addc_u32_e32 v2, vcc, v8, v3, vcc
26212621
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v5, vcc
26222622
; VI-NEXT: v_add_u32_e32 v2, vcc, v2, v4
26232623
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
2624-
; VI-NEXT: v_add_u32_e32 v4, vcc, v6, v2
2625-
; VI-NEXT: v_addc_u32_e32 v5, vcc, v7, v3, vcc
2626-
; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v5, 0
2627-
; VI-NEXT: v_mul_hi_u32 v6, v0, v4
2624+
; VI-NEXT: v_mov_b32_e32 v4, s6
2625+
; VI-NEXT: v_add_u32_e32 v5, vcc, v6, v2
2626+
; VI-NEXT: v_addc_u32_e32 v4, vcc, v4, v3, vcc
2627+
; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v4, 0
2628+
; VI-NEXT: v_mul_hi_u32 v6, v0, v5
26282629
; VI-NEXT: v_add_u32_e32 v6, vcc, v6, v2
26292630
; VI-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc
2630-
; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, v4, 0
2631-
; VI-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v5, 0
2631+
; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, v5, 0
2632+
; VI-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v4, 0
26322633
; VI-NEXT: v_add_u32_e32 v2, vcc, v6, v2
26332634
; VI-NEXT: v_addc_u32_e32 v2, vcc, v7, v3, vcc
26342635
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v5, vcc

0 commit comments

Comments
 (0)