Skip to content

Commit ef1eb03

Browse files
authored
[AArch64] Add neon bici test for haddu and shadd (#84073)
Add neon bici test for haddu and shadd, prerequisite for #76644
1 parent 881df55 commit ef1eb03

File tree

1 file changed

+129
-0
lines changed

1 file changed

+129
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
3+
4+
declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
5+
declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
6+
declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
7+
declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
8+
9+
define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
10+
; CHECK-LABEL: haddu_zext:
11+
; CHECK: // %bb.0:
12+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
13+
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
14+
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
15+
; CHECK-NEXT: bic v0.8h, #254, lsl #8
16+
; CHECK-NEXT: ret
17+
%x0 = zext <8 x i8> %a0 to <8 x i16>
18+
%x1 = zext <8 x i8> %a1 to <8 x i16>
19+
%hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
20+
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511,i16 511, i16 511, i16 511, i16 511>
21+
ret <8 x i16> %res
22+
}
23+
24+
define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
25+
; CHECK-LABEL: rhaddu_zext:
26+
; CHECK: // %bb.0:
27+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
28+
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
29+
; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
30+
; CHECK-NEXT: bic v0.8h, #254, lsl #8
31+
; CHECK-NEXT: ret
32+
%x0 = zext <8 x i8> %a0 to <8 x i16>
33+
%x1 = zext <8 x i8> %a1 to <8 x i16>
34+
%hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
35+
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
36+
ret <8 x i16> %res
37+
}
38+
39+
define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
40+
; CHECK-LABEL: hadds_zext:
41+
; CHECK: // %bb.0:
42+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
43+
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
44+
; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
45+
; CHECK-NEXT: bic v0.8h, #254, lsl #8
46+
; CHECK-NEXT: ret
47+
%x0 = zext <8 x i8> %a0 to <8 x i16>
48+
%x1 = zext <8 x i8> %a1 to <8 x i16>
49+
%hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
50+
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
51+
ret <8 x i16> %res
52+
}
53+
54+
define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
55+
; CHECK-LABEL: shaddu_zext:
56+
; CHECK: // %bb.0:
57+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
58+
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
59+
; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
60+
; CHECK-NEXT: bic v0.8h, #254, lsl #8
61+
; CHECK-NEXT: ret
62+
%x0 = zext <8 x i8> %a0 to <8 x i16>
63+
%x1 = zext <8 x i8> %a1 to <8 x i16>
64+
%hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
65+
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
66+
ret <8 x i16> %res
67+
}
68+
69+
; ; negative tests
70+
71+
define <8 x i16> @haddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
72+
; CHECK-LABEL: haddu_sext:
73+
; CHECK: // %bb.0:
74+
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
75+
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
76+
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
77+
; CHECK-NEXT: bic v0.8h, #254, lsl #8
78+
; CHECK-NEXT: ret
79+
%x0 = sext <8 x i8> %a0 to <8 x i16>
80+
%x1 = sext <8 x i8> %a1 to <8 x i16>
81+
%hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
82+
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511,i16 511, i16 511, i16 511, i16 511>
83+
ret <8 x i16> %res
84+
}
85+
86+
define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) {
87+
; CHECK-LABEL: urhadd_sext:
88+
; CHECK: // %bb.0:
89+
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
90+
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
91+
; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
92+
; CHECK-NEXT: bic v0.8h, #254, lsl #8
93+
; CHECK-NEXT: ret
94+
%x0 = sext <8 x i8> %a0 to <8 x i16>
95+
%x1 = sext <8 x i8> %a1 to <8 x i16>
96+
%hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
97+
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511,i16 511, i16 511, i16 511, i16 511>
98+
ret <8 x i16> %res
99+
}
100+
101+
define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) {
102+
; CHECK-LABEL: hadds_sext:
103+
; CHECK: // %bb.0:
104+
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
105+
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
106+
; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
107+
; CHECK-NEXT: bic v0.8h, #254, lsl #8
108+
; CHECK-NEXT: ret
109+
%x0 = sext <8 x i8> %a0 to <8 x i16>
110+
%x1 = sext <8 x i8> %a1 to <8 x i16>
111+
%hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
112+
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
113+
ret <8 x i16> %res
114+
}
115+
116+
define <8 x i16> @shaddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
117+
; CHECK-LABEL: shaddu_sext:
118+
; CHECK: // %bb.0:
119+
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
120+
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
121+
; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
122+
; CHECK-NEXT: bic v0.8h, #254, lsl #8
123+
; CHECK-NEXT: ret
124+
%x0 = sext <8 x i8> %a0 to <8 x i16>
125+
%x1 = sext <8 x i8> %a1 to <8 x i16>
126+
%hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
127+
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
128+
ret <8 x i16> %res
129+
}

0 commit comments

Comments
 (0)