Skip to content

Commit 47ed921

Browse files
authored
[AMDGPU] Add legality check when folding short 64-bit literals (#69391)
We can only fold it if it can fit into 32-bit. I believe it did not trigger yet because we do not select 64-bit literals generally.
1 parent bf7a826 commit 47ed921

File tree

2 files changed

+137
-0
lines changed

2 files changed

+137
-0
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5490,6 +5490,18 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
54905490
return true;
54915491
}
54925492

5493+
if (MO->isImm()) {
5494+
uint64_t Imm = MO->getImm();
5495+
bool Is64BitFPOp = OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_FP64;
5496+
bool Is64BitOp = Is64BitFPOp ||
5497+
OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 ||
5498+
OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2INT32 ||
5499+
OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32;
5500+
if (Is64BitOp && !AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp) &&
5501+
!AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm()))
5502+
return false;
5503+
}
5504+
54935505
// Handle non-register types that are treated like immediates.
54945506
assert(MO->isImm() || MO->isTargetIndex() || MO->isFI() || MO->isGlobal());
54955507

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
2+
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass=si-fold-operands -o - %s | FileCheck --check-prefix=GCN %s
3+
4+
---
5+
name: no_fold_fp_64bit_literal_sgpr
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0:
9+
10+
; GCN-LABEL: name: no_fold_fp_64bit_literal_sgpr
11+
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
12+
; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1311768467750121200
13+
; GCN-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = V_ADD_F64_e64 0, [[S_MOV_B64_]], 0, [[DEF]], 0, 0, implicit $mode, implicit $exec
14+
; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_ADD_F64_e64_]]
15+
%0:vreg_64 = IMPLICIT_DEF
16+
%1:sreg_64 = S_MOV_B64 1311768467750121200
17+
%2:vreg_64 = V_ADD_F64_e64 0, %1, 0, %0, 0, 0, implicit $mode, implicit $exec
18+
SI_RETURN_TO_EPILOG %2
19+
...
20+
21+
---
22+
name: no_fold_fp_64bit_literal_vgpr
23+
tracksRegLiveness: true
24+
body: |
25+
bb.0:
26+
27+
; GCN-LABEL: name: no_fold_fp_64bit_literal_vgpr
28+
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
29+
; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
30+
; GCN-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = V_ADD_F64_e64 0, [[V_MOV_B]], 0, [[DEF]], 0, 0, implicit $mode, implicit $exec
31+
; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_ADD_F64_e64_]]
32+
%0:vreg_64 = IMPLICIT_DEF
33+
%1:vreg_64 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
34+
%2:vreg_64 = V_ADD_F64_e64 0, %1, 0, %0, 0, 0, implicit $mode, implicit $exec
35+
SI_RETURN_TO_EPILOG %2
36+
...
37+
38+
---
39+
name: fold_fp_32bit_literal_sgpr
40+
tracksRegLiveness: true
41+
body: |
42+
bb.0:
43+
44+
; GCN-LABEL: name: fold_fp_32bit_literal_sgpr
45+
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
46+
; GCN-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64 = V_ADD_F64_e64 0, 4636737291354636288, 0, [[DEF]], 0, 0, implicit $mode, implicit $exec
47+
; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_ADD_F64_e64_]]
48+
%0:vreg_64 = IMPLICIT_DEF
49+
%1:sreg_64 = S_MOV_B64 4636737291354636288
50+
%2:vreg_64 = V_ADD_F64_e64 0, %1, 0, %0, 0, 0, implicit $mode, implicit $exec
51+
SI_RETURN_TO_EPILOG %2
52+
...
53+
54+
---
55+
name: no_fold_int_64bit_literal_sgpr
56+
tracksRegLiveness: true
57+
body: |
58+
bb.0:
59+
60+
; GCN-LABEL: name: no_fold_int_64bit_literal_sgpr
61+
; GCN: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
62+
; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1311768467750121200
63+
; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[DEF]], [[S_MOV_B64_]], implicit-def $scc
64+
; GCN-NEXT: SI_RETURN_TO_EPILOG [[S_AND_B64_]]
65+
%0:sreg_64 = IMPLICIT_DEF
66+
%1:sreg_64 = S_MOV_B64 1311768467750121200
67+
%2:sreg_64 = S_AND_B64 %0, %1, implicit-def $scc
68+
SI_RETURN_TO_EPILOG %2
69+
...
70+
71+
---
72+
name: fold_int_32bit_literal_sgpr
73+
tracksRegLiveness: true
74+
body: |
75+
bb.0:
76+
77+
; GCN-LABEL: name: fold_int_32bit_literal_sgpr
78+
; GCN: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
79+
; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[DEF]], 2147483647, implicit-def $scc
80+
; GCN-NEXT: SI_RETURN_TO_EPILOG [[S_AND_B64_]]
81+
%0:sreg_64 = IMPLICIT_DEF
82+
%1:sreg_64 = S_MOV_B64 2147483647
83+
%2:sreg_64 = S_AND_B64 %0, %1, implicit-def $scc
84+
SI_RETURN_TO_EPILOG %2
85+
...
86+
87+
---
88+
name: fold_uint_32bit_literal_sgpr
89+
tracksRegLiveness: true
90+
body: |
91+
bb.0:
92+
93+
; GCN-LABEL: name: fold_uint_32bit_literal_sgpr
94+
; GCN: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
95+
; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[DEF]], 4294967295, implicit-def $scc
96+
; GCN-NEXT: SI_RETURN_TO_EPILOG [[S_AND_B64_]]
97+
%0:sreg_64 = IMPLICIT_DEF
98+
%1:sreg_64 = S_MOV_B64 4294967295
99+
%2:sreg_64 = S_AND_B64 %0, %1, implicit-def $scc
100+
SI_RETURN_TO_EPILOG %2
101+
...
102+
103+
---
104+
name: no_fold_v2fp_64bit_literal_sgpr
105+
tracksRegLiveness: true
106+
body: |
107+
bb.0:
108+
109+
%0:vreg_64 = IMPLICIT_DEF
110+
%1:vreg_64 = V_MOV_B64_PSEUDO 4629700418019000320, implicit $exec
111+
%2:vreg_64 = V_PK_ADD_F32 0, %0, 0, %1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
112+
SI_RETURN_TO_EPILOG %2
113+
...
114+
115+
---
116+
name: fold_v2fp_32bit_literal_sgpr
117+
tracksRegLiveness: true
118+
body: |
119+
bb.0:
120+
121+
%0:vreg_64 = IMPLICIT_DEF
122+
%1:vreg_64 = V_MOV_B64_PSEUDO 1065353216, implicit $exec
123+
%2:vreg_64 = V_PK_ADD_F32 0, %0, 0, %1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
124+
SI_RETURN_TO_EPILOG %2
125+
...

0 commit comments

Comments
 (0)