[DAG] Constant fold FMAD #69324

Pierre-vh · 2023-10-17T12:11:36Z

This has very little effect on codegen in practice, but is a nice to have I think.

See #68315

This has very little effect on codegen in practice, but is a nice to have I think. See llvm#68315

llvmbot · 2023-10-17T12:12:41Z

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-selectiondag

Author: Pierre van Houtryve (Pierre-vh)

Changes

This has very little effect on codegen in practice, but is a nice to have I think.

See #68315

Full diff: https://github.com/llvm/llvm-project/pull/69324.diff

3 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+19)
(modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+7-2)
(modified) llvm/test/CodeGen/AMDGPU/udiv.ll (+8-27)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 20ad4c766a1a3fc..eac0a14d8303fa4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -495,6 +495,7 @@ namespace {
     SDValue visitFSUB(SDNode *N);
     SDValue visitFMUL(SDNode *N);
     template <class MatchContextClass> SDValue visitFMA(SDNode *N);
+    SDValue visitFMAD(SDNode *N);
     SDValue visitFDIV(SDNode *N);
     SDValue visitFREM(SDNode *N);
     SDValue visitFSQRT(SDNode *N);
@@ -2000,6 +2001,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::FSUB:               return visitFSUB(N);
   case ISD::FMUL:               return visitFMUL(N);
   case ISD::FMA:                return visitFMA<EmptyMatchContext>(N);
+  case ISD::FMAD:
+    return visitFMAD(N);
   case ISD::FDIV:               return visitFDIV(N);
   case ISD::FREM:               return visitFREM(N);
   case ISD::FSQRT:              return visitFSQRT(N);
@@ -16752,6 +16755,22 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitFMAD(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue N2 = N->getOperand(2);
+  EVT VT = N->getValueType(0);
+  SDLoc DL(N);
+
+  // Constant fold FMAD.
+  if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) &&
+      isa<ConstantFPSDNode>(N2)) {
+    return DAG.getNode(ISD::FMAD, DL, VT, N0, N1, N2);
+  }
+
+  return SDValue();
+}
+
 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
 // reciprocal.
 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 3f06d0bd4eaa1d5..b028c483718107e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7069,7 +7069,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
          "Operand is DELETED_NODE!");
   // Perform various simplifications.
   switch (Opcode) {
-  case ISD::FMA: {
+  case ISD::FMA:
+  case ISD::FMAD: {
     assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
     assert(N1.getValueType() == VT && N2.getValueType() == VT &&
            N3.getValueType() == VT && "FMA types must match!");
@@ -7080,7 +7081,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
       APFloat  V1 = N1CFP->getValueAPF();
       const APFloat &V2 = N2CFP->getValueAPF();
       const APFloat &V3 = N3CFP->getValueAPF();
-      V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
+      if (Opcode == ISD::FMAD) {
+        V1.multiply(V2, APFloat::rmNearestTiesToEven);
+        V1.add(V3, APFloat::rmNearestTiesToEven);
+      } else
+        V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
       return getConstantFP(V1, DL, VT);
     }
     break;
diff --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll
index 012b3f976734dec..e554f912ff64886 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv.ll
@@ -2619,39 +2619,20 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
 ; VI-LABEL: v_test_udiv64_mulhi_fold:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v2, 0x4f800000
-; VI-NEXT:    v_madak_f32 v2, 0, v2, 0x47c35000
-; VI-NEXT:    v_rcp_f32_e32 v2, v2
+; VI-NEXT:    v_mov_b32_e32 v4, 0xa7c5
+; VI-NEXT:    v_mul_u32_u24_e32 v3, 0x500, v4
+; VI-NEXT:    v_mul_hi_u32_u24_e32 v2, 0x500, v4
+; VI-NEXT:    v_add_u32_e32 v3, vcc, 0x4237, v3
+; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v2, vcc
+; VI-NEXT:    v_add_u32_e32 v6, vcc, 0xa9000000, v3
 ; VI-NEXT:    s_mov_b32 s6, 0xfffe7960
-; VI-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
-; VI-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
-; VI-NEXT:    v_trunc_f32_e32 v3, v3
-; VI-NEXT:    v_madmk_f32 v2, v3, 0xcf800000, v2
-; VI-NEXT:    v_cvt_u32_f32_e32 v6, v2
-; VI-NEXT:    v_cvt_u32_f32_e32 v7, v3
-; VI-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0
-; VI-NEXT:    v_mul_lo_u32 v4, v7, s6
-; VI-NEXT:    v_sub_u32_e32 v3, vcc, v3, v6
-; VI-NEXT:    v_add_u32_e32 v8, vcc, v3, v4
-; VI-NEXT:    v_mul_hi_u32 v5, v6, v2
-; VI-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v6, v8, 0
-; VI-NEXT:    v_add_u32_e32 v9, vcc, v5, v3
-; VI-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v7, v2, 0
-; VI-NEXT:    v_addc_u32_e32 v10, vcc, 0, v4, vcc
-; VI-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v7, v8, 0
-; VI-NEXT:    v_add_u32_e32 v2, vcc, v9, v2
-; VI-NEXT:    v_addc_u32_e32 v2, vcc, v10, v3, vcc
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v5, vcc
-; VI-NEXT:    v_add_u32_e32 v2, vcc, v2, v4
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
-; VI-NEXT:    v_add_u32_e32 v6, vcc, v6, v2
-; VI-NEXT:    v_addc_u32_e32 v7, vcc, v7, v3, vcc
 ; VI-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0
+; VI-NEXT:    v_addc_u32_e32 v7, vcc, v5, v4, vcc
 ; VI-NEXT:    v_mul_lo_u32 v4, v7, s6
 ; VI-NEXT:    v_sub_u32_e32 v3, vcc, v3, v6
+; VI-NEXT:    v_mul_hi_u32 v8, v6, v2
 ; VI-NEXT:    v_add_u32_e32 v5, vcc, v4, v3
 ; VI-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0
-; VI-NEXT:    v_mul_hi_u32 v8, v6, v2
 ; VI-NEXT:    v_add_u32_e32 v8, vcc, v8, v3
 ; VI-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v7, v2, 0
 ; VI-NEXT:    v_addc_u32_e32 v9, vcc, 0, v4, vcc

jayfoad · 2023-10-17T12:19:36Z

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+  SDLoc DL(N);
+
+  // Constant fold FMAD.
+  if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) &&


Should maybe use DAG.isConstantFPBuildVectorOrConstantFP? But I see the FMA case does not.

I've been hoping to convert SelectionDAG::foldConstantFPMath to take a ArrayRef<SDValue> Ops arg instead of assuming a binop, but have never gotten around to it.

jayfoad

LGTM.

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

github-actions · 2023-10-17T12:22:24Z

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:

git-clang-format --diff 90576084c1d797f845055e8d95c2d9f455268841 c015d9e2e37a56a189f4f70d951eb8c61c71ad55 -- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

View the diff from clang-format here.

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cec75b9331d6..dc0ea25f7b93 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2001,7 +2001,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::FSUB:               return visitFSUB(N);
   case ISD::FMUL:               return visitFMUL(N);
   case ISD::FMA:                return visitFMA<EmptyMatchContext>(N);
-  case ISD::FMAD:               return visitFMAD(N);
+  case ISD::FMAD:
+    return visitFMAD(N);
   case ISD::FDIV:               return visitFDIV(N);
   case ISD::FREM:               return visitFREM(N);
   case ISD::FSQRT:              return visitFSQRT(N);

kosarev · 2023-10-17T12:58:28Z

llvm/test/CodeGen/AMDGPU/udiv.ll

+; VI-NEXT:    v_mul_u32_u24_e32 v3, 0x500, v4
+; VI-NEXT:    v_mul_hi_u32_u24_e32 v2, 0x500, v4


Can we now fold these two guys as well?

There is already computeKnownBits handling for MUL_U24/MUL_I24 - but it might not be handling the entire constant fold, it looks to be mainly setting the leading/trailing known bits.

Local branch amd-gfx d5cba0d Merged main:ed1d29028492 into amd-gfx:ece3beeeb394 Remote branch main c464fea [DAG] Constant fold FMAD (llvm#69324)

[DAG] Constant fold FMAD

b88dd9a

This has very little effect on codegen in practice, but is a nice to have I think. See llvm#68315

Pierre-vh requested review from RKSimon, arsenm, jayfoad and kosarev October 17, 2023 12:11

clang-format

9c2471b

llvmbot added backend:AMDGPU llvm:SelectionDAG SelectionDAGISel as well labels Oct 17, 2023

jayfoad reviewed Oct 17, 2023

View reviewed changes

jayfoad approved these changes Oct 17, 2023

View reviewed changes

jayfoad reviewed Oct 17, 2023

View reviewed changes

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Show resolved Hide resolved

formatting

c015d9e

kosarev reviewed Oct 17, 2023

View reviewed changes

Pierre-vh merged commit c464fea into llvm:main Oct 18, 2023

Pierre-vh mentioned this pull request Oct 18, 2023

[DAG] Constant Folding for U/SMUL_LOHI #69437

Merged

Pierre-vh deleted the dag-fold-fmad branch October 19, 2023 10:15

madhur13490 mentioned this pull request Oct 20, 2023

Revert commit ba8565fbcb975e2d067ce3ae5a7dbaae4953edd3 madhur13490/llvm-project#3

Closed

banach-space mentioned this pull request Oct 24, 2023

[mlir][vector] Add scalable vectors to tests for vector.contract #70039

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[DAG] Constant fold FMAD #69324

[DAG] Constant fold FMAD #69324

Uh oh!

Pierre-vh commented Oct 17, 2023

Uh oh!

llvmbot commented Oct 17, 2023 •

edited

Loading

Uh oh!

jayfoad Oct 17, 2023

Uh oh!

RKSimon Oct 17, 2023

Uh oh!

jayfoad left a comment

Uh oh!

Uh oh!

github-actions bot commented Oct 17, 2023 •

edited

Loading

Uh oh!

kosarev Oct 17, 2023

Uh oh!

RKSimon Oct 17, 2023

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

		; VI-NEXT: v_mul_u32_u24_e32 v3, 0x500, v4
		; VI-NEXT: v_mul_hi_u32_u24_e32 v2, 0x500, v4

[DAG] Constant fold FMAD #69324

[DAG] Constant fold FMAD #69324

Uh oh!

Conversation

Pierre-vh commented Oct 17, 2023

Uh oh!

llvmbot commented Oct 17, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

jayfoad Oct 17, 2023

Choose a reason for hiding this comment

Uh oh!

RKSimon Oct 17, 2023

Choose a reason for hiding this comment

Uh oh!

jayfoad left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

github-actions bot commented Oct 17, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

kosarev Oct 17, 2023

Choose a reason for hiding this comment

Uh oh!

RKSimon Oct 17, 2023

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

llvmbot commented Oct 17, 2023 •

edited

Loading

github-actions bot commented Oct 17, 2023 •

edited

Loading