Skip to content

Commit bbb1854

Browse files
committed
[LoongArch] Allow f16 codegen with expansion to libcalls
The test case is adapted from llvm/test/CodeGen/RISCV/fp16-promote.ll, because it covers some more IR patterns that ought to be common. Fixes llvm#93894
1 parent 7eaae4e commit bbb1854

File tree

2 files changed

+334
-0
lines changed

2 files changed

+334
-0
lines changed

Diff for: llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
171171
// Set operations for 'F' feature.
172172

173173
if (Subtarget.hasBasicF()) {
174+
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
175+
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
174176
setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
175177

176178
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
@@ -186,6 +188,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
186188
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
187189
setOperationAction(ISD::FPOW, MVT::f32, Expand);
188190
setOperationAction(ISD::FREM, MVT::f32, Expand);
191+
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
192+
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
189193

190194
if (Subtarget.is64Bit())
191195
setOperationAction(ISD::FRINT, MVT::f32, Legal);
@@ -202,7 +206,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
202206
// Set operations for 'D' feature.
203207

204208
if (Subtarget.hasBasicD()) {
209+
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
205210
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
211+
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
206212
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
207213
setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
208214

@@ -219,6 +225,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
219225
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
220226
setOperationAction(ISD::FPOW, MVT::f64, Expand);
221227
setOperationAction(ISD::FREM, MVT::f64, Expand);
228+
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
229+
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
222230

223231
if (Subtarget.is64Bit())
224232
setOperationAction(ISD::FRINT, MVT::f64, Legal);

Diff for: llvm/test/CodeGen/LoongArch/fp16-promote.ll

+326
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,326 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32
3+
; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64
4+
5+
define void @test_load_store(ptr %p, ptr %q) nounwind {
6+
; LA32-LABEL: test_load_store:
7+
; LA32: # %bb.0:
8+
; LA32-NEXT: ld.h $a0, $a0, 0
9+
; LA32-NEXT: st.h $a0, $a1, 0
10+
; LA32-NEXT: ret
11+
;
12+
; LA64-LABEL: test_load_store:
13+
; LA64: # %bb.0:
14+
; LA64-NEXT: ld.h $a0, $a0, 0
15+
; LA64-NEXT: st.h $a0, $a1, 0
16+
; LA64-NEXT: ret
17+
%a = load half, ptr %p
18+
store half %a, ptr %q
19+
ret void
20+
}
21+
22+
define float @test_fpextend_float(ptr %p) nounwind {
23+
; LA32-LABEL: test_fpextend_float:
24+
; LA32: # %bb.0:
25+
; LA32-NEXT: ld.hu $a0, $a0, 0
26+
; LA32-NEXT: b %plt(__gnu_h2f_ieee)
27+
;
28+
; LA64-LABEL: test_fpextend_float:
29+
; LA64: # %bb.0:
30+
; LA64-NEXT: ld.hu $a0, $a0, 0
31+
; LA64-NEXT: b %plt(__gnu_h2f_ieee)
32+
%a = load half, ptr %p
33+
%r = fpext half %a to float
34+
ret float %r
35+
}
36+
37+
define double @test_fpextend_double(ptr %p) nounwind {
38+
; LA32-LABEL: test_fpextend_double:
39+
; LA32: # %bb.0:
40+
; LA32-NEXT: addi.w $sp, $sp, -16
41+
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
42+
; LA32-NEXT: ld.hu $a0, $a0, 0
43+
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
44+
; LA32-NEXT: fcvt.d.s $fa0, $fa0
45+
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
46+
; LA32-NEXT: addi.w $sp, $sp, 16
47+
; LA32-NEXT: ret
48+
;
49+
; LA64-LABEL: test_fpextend_double:
50+
; LA64: # %bb.0:
51+
; LA64-NEXT: addi.d $sp, $sp, -16
52+
; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
53+
; LA64-NEXT: ld.hu $a0, $a0, 0
54+
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
55+
; LA64-NEXT: fcvt.d.s $fa0, $fa0
56+
; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
57+
; LA64-NEXT: addi.d $sp, $sp, 16
58+
; LA64-NEXT: ret
59+
%a = load half, ptr %p
60+
%r = fpext half %a to double
61+
ret double %r
62+
}
63+
64+
define void @test_fptrunc_float(float %f, ptr %p) nounwind {
65+
; LA32-LABEL: test_fptrunc_float:
66+
; LA32: # %bb.0:
67+
; LA32-NEXT: addi.w $sp, $sp, -16
68+
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
69+
; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
70+
; LA32-NEXT: move $fp, $a0
71+
; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
72+
; LA32-NEXT: st.h $a0, $fp, 0
73+
; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
74+
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
75+
; LA32-NEXT: addi.w $sp, $sp, 16
76+
; LA32-NEXT: ret
77+
;
78+
; LA64-LABEL: test_fptrunc_float:
79+
; LA64: # %bb.0:
80+
; LA64-NEXT: addi.d $sp, $sp, -16
81+
; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
82+
; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
83+
; LA64-NEXT: move $fp, $a0
84+
; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
85+
; LA64-NEXT: st.h $a0, $fp, 0
86+
; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
87+
; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
88+
; LA64-NEXT: addi.d $sp, $sp, 16
89+
; LA64-NEXT: ret
90+
%a = fptrunc float %f to half
91+
store half %a, ptr %p
92+
ret void
93+
}
94+
95+
define void @test_fptrunc_double(double %d, ptr %p) nounwind {
96+
; LA32-LABEL: test_fptrunc_double:
97+
; LA32: # %bb.0:
98+
; LA32-NEXT: addi.w $sp, $sp, -16
99+
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
100+
; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
101+
; LA32-NEXT: move $fp, $a0
102+
; LA32-NEXT: bl %plt(__truncdfhf2)
103+
; LA32-NEXT: st.h $a0, $fp, 0
104+
; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
105+
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
106+
; LA32-NEXT: addi.w $sp, $sp, 16
107+
; LA32-NEXT: ret
108+
;
109+
; LA64-LABEL: test_fptrunc_double:
110+
; LA64: # %bb.0:
111+
; LA64-NEXT: addi.d $sp, $sp, -16
112+
; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
113+
; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
114+
; LA64-NEXT: move $fp, $a0
115+
; LA64-NEXT: bl %plt(__truncdfhf2)
116+
; LA64-NEXT: st.h $a0, $fp, 0
117+
; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
118+
; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
119+
; LA64-NEXT: addi.d $sp, $sp, 16
120+
; LA64-NEXT: ret
121+
%a = fptrunc double %d to half
122+
store half %a, ptr %p
123+
ret void
124+
}
125+
126+
define half @test_fadd_reg(half %a, half %b) nounwind {
127+
; LA32-LABEL: test_fadd_reg:
128+
; LA32: # %bb.0:
129+
; LA32-NEXT: addi.w $sp, $sp, -32
130+
; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
131+
; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
132+
; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
133+
; LA32-NEXT: fmov.s $fs0, $fa0
134+
; LA32-NEXT: fmov.s $fa0, $fa1
135+
; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
136+
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
137+
; LA32-NEXT: fmov.s $fs1, $fa0
138+
; LA32-NEXT: fmov.s $fa0, $fs0
139+
; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
140+
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
141+
; LA32-NEXT: fadd.s $fa0, $fa0, $fs1
142+
; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
143+
; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
144+
; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
145+
; LA32-NEXT: addi.w $sp, $sp, 32
146+
; LA32-NEXT: ret
147+
;
148+
; LA64-LABEL: test_fadd_reg:
149+
; LA64: # %bb.0:
150+
; LA64-NEXT: addi.d $sp, $sp, -32
151+
; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
152+
; LA64-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
153+
; LA64-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
154+
; LA64-NEXT: fmov.s $fs0, $fa0
155+
; LA64-NEXT: fmov.s $fa0, $fa1
156+
; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
157+
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
158+
; LA64-NEXT: fmov.s $fs1, $fa0
159+
; LA64-NEXT: fmov.s $fa0, $fs0
160+
; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
161+
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
162+
; LA64-NEXT: fadd.s $fa0, $fa0, $fs1
163+
; LA64-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
164+
; LA64-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
165+
; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
166+
; LA64-NEXT: addi.d $sp, $sp, 32
167+
; LA64-NEXT: ret
168+
%r = fadd half %a, %b
169+
ret half %r
170+
}
171+
172+
define void @test_fadd_mem(ptr %p, ptr %q) nounwind {
173+
; LA32-LABEL: test_fadd_mem:
174+
; LA32: # %bb.0:
175+
; LA32-NEXT: addi.w $sp, $sp, -32
176+
; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
177+
; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
178+
; LA32-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
179+
; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill
180+
; LA32-NEXT: move $fp, $a1
181+
; LA32-NEXT: move $s0, $a0
182+
; LA32-NEXT: ld.hu $a0, $a0, 0
183+
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
184+
; LA32-NEXT: fmov.s $fs0, $fa0
185+
; LA32-NEXT: ld.hu $a0, $fp, 0
186+
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
187+
; LA32-NEXT: fadd.s $fa0, $fs0, $fa0
188+
; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
189+
; LA32-NEXT: st.h $a0, $s0, 0
190+
; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
191+
; LA32-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
192+
; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
193+
; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
194+
; LA32-NEXT: addi.w $sp, $sp, 32
195+
; LA32-NEXT: ret
196+
;
197+
; LA64-LABEL: test_fadd_mem:
198+
; LA64: # %bb.0:
199+
; LA64-NEXT: addi.d $sp, $sp, -32
200+
; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
201+
; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
202+
; LA64-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill
203+
; LA64-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill
204+
; LA64-NEXT: move $fp, $a1
205+
; LA64-NEXT: move $s0, $a0
206+
; LA64-NEXT: ld.hu $a0, $a0, 0
207+
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
208+
; LA64-NEXT: fmov.s $fs0, $fa0
209+
; LA64-NEXT: ld.hu $a0, $fp, 0
210+
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
211+
; LA64-NEXT: fadd.s $fa0, $fs0, $fa0
212+
; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
213+
; LA64-NEXT: st.h $a0, $s0, 0
214+
; LA64-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload
215+
; LA64-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload
216+
; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
217+
; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
218+
; LA64-NEXT: addi.d $sp, $sp, 32
219+
; LA64-NEXT: ret
220+
%a = load half, ptr %p
221+
%b = load half, ptr %q
222+
%r = fadd half %a, %b
223+
store half %r, ptr %p
224+
ret void
225+
}
226+
227+
define half @test_fmul_reg(half %a, half %b) nounwind {
228+
; LA32-LABEL: test_fmul_reg:
229+
; LA32: # %bb.0:
230+
; LA32-NEXT: addi.w $sp, $sp, -32
231+
; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
232+
; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
233+
; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
234+
; LA32-NEXT: fmov.s $fs0, $fa0
235+
; LA32-NEXT: fmov.s $fa0, $fa1
236+
; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
237+
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
238+
; LA32-NEXT: fmov.s $fs1, $fa0
239+
; LA32-NEXT: fmov.s $fa0, $fs0
240+
; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
241+
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
242+
; LA32-NEXT: fmul.s $fa0, $fa0, $fs1
243+
; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
244+
; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
245+
; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
246+
; LA32-NEXT: addi.w $sp, $sp, 32
247+
; LA32-NEXT: ret
248+
;
249+
; LA64-LABEL: test_fmul_reg:
250+
; LA64: # %bb.0:
251+
; LA64-NEXT: addi.d $sp, $sp, -32
252+
; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
253+
; LA64-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
254+
; LA64-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
255+
; LA64-NEXT: fmov.s $fs0, $fa0
256+
; LA64-NEXT: fmov.s $fa0, $fa1
257+
; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
258+
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
259+
; LA64-NEXT: fmov.s $fs1, $fa0
260+
; LA64-NEXT: fmov.s $fa0, $fs0
261+
; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
262+
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
263+
; LA64-NEXT: fmul.s $fa0, $fa0, $fs1
264+
; LA64-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
265+
; LA64-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
266+
; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
267+
; LA64-NEXT: addi.d $sp, $sp, 32
268+
; LA64-NEXT: ret
269+
%r = fmul half %a, %b
270+
ret half %r
271+
}
272+
273+
define void @test_fmul_mem(ptr %p, ptr %q) nounwind {
274+
; LA32-LABEL: test_fmul_mem:
275+
; LA32: # %bb.0:
276+
; LA32-NEXT: addi.w $sp, $sp, -32
277+
; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
278+
; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
279+
; LA32-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
280+
; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill
281+
; LA32-NEXT: move $fp, $a1
282+
; LA32-NEXT: move $s0, $a0
283+
; LA32-NEXT: ld.hu $a0, $a0, 0
284+
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
285+
; LA32-NEXT: fmov.s $fs0, $fa0
286+
; LA32-NEXT: ld.hu $a0, $fp, 0
287+
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
288+
; LA32-NEXT: fmul.s $fa0, $fs0, $fa0
289+
; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
290+
; LA32-NEXT: st.h $a0, $s0, 0
291+
; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
292+
; LA32-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
293+
; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
294+
; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
295+
; LA32-NEXT: addi.w $sp, $sp, 32
296+
; LA32-NEXT: ret
297+
;
298+
; LA64-LABEL: test_fmul_mem:
299+
; LA64: # %bb.0:
300+
; LA64-NEXT: addi.d $sp, $sp, -32
301+
; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
302+
; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
303+
; LA64-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill
304+
; LA64-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill
305+
; LA64-NEXT: move $fp, $a1
306+
; LA64-NEXT: move $s0, $a0
307+
; LA64-NEXT: ld.hu $a0, $a0, 0
308+
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
309+
; LA64-NEXT: fmov.s $fs0, $fa0
310+
; LA64-NEXT: ld.hu $a0, $fp, 0
311+
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
312+
; LA64-NEXT: fmul.s $fa0, $fs0, $fa0
313+
; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
314+
; LA64-NEXT: st.h $a0, $s0, 0
315+
; LA64-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload
316+
; LA64-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload
317+
; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
318+
; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
319+
; LA64-NEXT: addi.d $sp, $sp, 32
320+
; LA64-NEXT: ret
321+
%a = load half, ptr %p
322+
%b = load half, ptr %q
323+
%r = fmul half %a, %b
324+
store half %r, ptr %p
325+
ret void
326+
}

0 commit comments

Comments
 (0)