|
1 |
| -; RUN: llc < %s -mtriple=nvptx -mcpu=sm_60 | FileCheck %s |
2 |
| -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s |
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| 2 | +; RUN: llc < %s -mtriple=nvptx -mcpu=sm_60 | FileCheck %s --check-prefixes=CHECK,CHECK32 |
| 3 | +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s --check-prefixes=CHECK,CHECK64 |
3 | 4 | ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify %}
|
4 | 5 | ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify %}
|
5 | 6 |
|
6 |
| -; CHECK-LABEL: test_fabsf( |
7 | 7 | define float @test_fabsf(float %f) {
|
8 |
| -; CHECK: abs.f32 |
| 8 | +; CHECK-LABEL: test_fabsf( |
| 9 | +; CHECK: { |
| 10 | +; CHECK-NEXT: .reg .f32 %f<3>; |
| 11 | +; CHECK-EMPTY: |
| 12 | +; CHECK-NEXT: // %bb.0: |
| 13 | +; CHECK-NEXT: ld.param.f32 %f1, [test_fabsf_param_0]; |
| 14 | +; CHECK-NEXT: abs.f32 %f2, %f1; |
| 15 | +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; |
| 16 | +; CHECK-NEXT: ret; |
9 | 17 | %x = call float @llvm.fabs.f32(float %f)
|
10 | 18 | ret float %x
|
11 | 19 | }
|
12 | 20 |
|
13 |
| -; CHECK-LABEL: test_fabs( |
14 | 21 | define double @test_fabs(double %d) {
|
15 |
| -; CHECK: abs.f64 |
| 22 | +; CHECK-LABEL: test_fabs( |
| 23 | +; CHECK: { |
| 24 | +; CHECK-NEXT: .reg .f64 %fd<3>; |
| 25 | +; CHECK-EMPTY: |
| 26 | +; CHECK-NEXT: // %bb.0: |
| 27 | +; CHECK-NEXT: ld.param.f64 %fd1, [test_fabs_param_0]; |
| 28 | +; CHECK-NEXT: abs.f64 %fd2, %fd1; |
| 29 | +; CHECK-NEXT: st.param.f64 [func_retval0], %fd2; |
| 30 | +; CHECK-NEXT: ret; |
16 | 31 | %x = call double @llvm.fabs.f64(double %d)
|
17 | 32 | ret double %x
|
18 | 33 | }
|
19 | 34 |
|
20 |
| -; CHECK-LABEL: test_nvvm_sqrt( |
21 | 35 | define float @test_nvvm_sqrt(float %a) {
|
22 |
| -; CHECK: sqrt.rn.f32 |
| 36 | +; CHECK-LABEL: test_nvvm_sqrt( |
| 37 | +; CHECK: { |
| 38 | +; CHECK-NEXT: .reg .f32 %f<3>; |
| 39 | +; CHECK-EMPTY: |
| 40 | +; CHECK-NEXT: // %bb.0: |
| 41 | +; CHECK-NEXT: ld.param.f32 %f1, [test_nvvm_sqrt_param_0]; |
| 42 | +; CHECK-NEXT: sqrt.rn.f32 %f2, %f1; |
| 43 | +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; |
| 44 | +; CHECK-NEXT: ret; |
23 | 45 | %val = call float @llvm.nvvm.sqrt.f(float %a)
|
24 | 46 | ret float %val
|
25 | 47 | }
|
26 | 48 |
|
27 |
| -; CHECK-LABEL: test_llvm_sqrt( |
28 | 49 | define float @test_llvm_sqrt(float %a) {
|
29 |
| -; CHECK: sqrt.rn.f32 |
| 50 | +; CHECK-LABEL: test_llvm_sqrt( |
| 51 | +; CHECK: { |
| 52 | +; CHECK-NEXT: .reg .f32 %f<3>; |
| 53 | +; CHECK-EMPTY: |
| 54 | +; CHECK-NEXT: // %bb.0: |
| 55 | +; CHECK-NEXT: ld.param.f32 %f1, [test_llvm_sqrt_param_0]; |
| 56 | +; CHECK-NEXT: sqrt.rn.f32 %f2, %f1; |
| 57 | +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; |
| 58 | +; CHECK-NEXT: ret; |
30 | 59 | %val = call float @llvm.sqrt.f32(float %a)
|
31 | 60 | ret float %val
|
32 | 61 | }
|
33 | 62 |
|
34 |
| -; CHECK-LABEL: test_bitreverse32( |
35 | 63 | define i32 @test_bitreverse32(i32 %a) {
|
36 |
| -; CHECK: brev.b32 |
| 64 | +; CHECK-LABEL: test_bitreverse32( |
| 65 | +; CHECK: { |
| 66 | +; CHECK-NEXT: .reg .b32 %r<3>; |
| 67 | +; CHECK-EMPTY: |
| 68 | +; CHECK-NEXT: // %bb.0: |
| 69 | +; CHECK-NEXT: ld.param.u32 %r1, [test_bitreverse32_param_0]; |
| 70 | +; CHECK-NEXT: brev.b32 %r2, %r1; |
| 71 | +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| 72 | +; CHECK-NEXT: ret; |
37 | 73 | %val = call i32 @llvm.bitreverse.i32(i32 %a)
|
38 | 74 | ret i32 %val
|
39 | 75 | }
|
40 | 76 |
|
41 |
| -; CHECK-LABEL: test_bitreverse64( |
42 | 77 | define i64 @test_bitreverse64(i64 %a) {
|
43 |
| -; CHECK: brev.b64 |
| 78 | +; CHECK-LABEL: test_bitreverse64( |
| 79 | +; CHECK: { |
| 80 | +; CHECK-NEXT: .reg .b64 %rd<3>; |
| 81 | +; CHECK-EMPTY: |
| 82 | +; CHECK-NEXT: // %bb.0: |
| 83 | +; CHECK-NEXT: ld.param.u64 %rd1, [test_bitreverse64_param_0]; |
| 84 | +; CHECK-NEXT: brev.b64 %rd2, %rd1; |
| 85 | +; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; |
| 86 | +; CHECK-NEXT: ret; |
44 | 87 | %val = call i64 @llvm.bitreverse.i64(i64 %a)
|
45 | 88 | ret i64 %val
|
46 | 89 | }
|
47 | 90 |
|
48 |
| -; CHECK-LABEL: test_popc32( |
49 | 91 | define i32 @test_popc32(i32 %a) {
|
50 |
| -; CHECK: popc.b32 |
| 92 | +; CHECK-LABEL: test_popc32( |
| 93 | +; CHECK: { |
| 94 | +; CHECK-NEXT: .reg .b32 %r<3>; |
| 95 | +; CHECK-EMPTY: |
| 96 | +; CHECK-NEXT: // %bb.0: |
| 97 | +; CHECK-NEXT: ld.param.u32 %r1, [test_popc32_param_0]; |
| 98 | +; CHECK-NEXT: popc.b32 %r2, %r1; |
| 99 | +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| 100 | +; CHECK-NEXT: ret; |
51 | 101 | %val = call i32 @llvm.ctpop.i32(i32 %a)
|
52 | 102 | ret i32 %val
|
53 | 103 | }
|
54 | 104 |
|
55 |
| -; CHECK-LABEL: test_popc64 |
56 | 105 | define i64 @test_popc64(i64 %a) {
|
57 |
| -; CHECK: popc.b64 |
58 |
| -; CHECK: cvt.u64.u32 |
| 106 | +; CHECK-LABEL: test_popc64( |
| 107 | +; CHECK: { |
| 108 | +; CHECK-NEXT: .reg .b32 %r<2>; |
| 109 | +; CHECK-NEXT: .reg .b64 %rd<3>; |
| 110 | +; CHECK-EMPTY: |
| 111 | +; CHECK-NEXT: // %bb.0: |
| 112 | +; CHECK-NEXT: ld.param.u64 %rd1, [test_popc64_param_0]; |
| 113 | +; CHECK-NEXT: popc.b64 %r1, %rd1; |
| 114 | +; CHECK-NEXT: cvt.u64.u32 %rd2, %r1; |
| 115 | +; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; |
| 116 | +; CHECK-NEXT: ret; |
59 | 117 | %val = call i64 @llvm.ctpop.i64(i64 %a)
|
60 | 118 | ret i64 %val
|
61 | 119 | }
|
62 | 120 |
|
63 | 121 | ; NVPTX popc.b64 returns an i32 even though @llvm.ctpop.i64 returns an i64, so
|
64 | 122 | ; if this function returns an i32, there's no need to do any type conversions
|
65 | 123 | ; in the ptx.
|
66 |
| -; CHECK-LABEL: test_popc64_trunc |
67 | 124 | define i32 @test_popc64_trunc(i64 %a) {
|
68 |
| -; CHECK: popc.b64 |
69 |
| -; CHECK-NOT: cvt. |
| 125 | +; CHECK-LABEL: test_popc64_trunc( |
| 126 | +; CHECK: { |
| 127 | +; CHECK-NEXT: .reg .b32 %r<2>; |
| 128 | +; CHECK-NEXT: .reg .b64 %rd<2>; |
| 129 | +; CHECK-EMPTY: |
| 130 | +; CHECK-NEXT: // %bb.0: |
| 131 | +; CHECK-NEXT: ld.param.u64 %rd1, [test_popc64_trunc_param_0]; |
| 132 | +; CHECK-NEXT: popc.b64 %r1, %rd1; |
| 133 | +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; |
| 134 | +; CHECK-NEXT: ret; |
70 | 135 | %val = call i64 @llvm.ctpop.i64(i64 %a)
|
71 | 136 | %trunc = trunc i64 %val to i32
|
72 | 137 | ret i32 %trunc
|
73 | 138 | }
|
74 | 139 |
|
75 | 140 | ; llvm.ctpop.i16 is implemenented by converting to i32, running popc.b32, and
|
76 | 141 | ; then converting back to i16.
|
77 |
| -; CHECK-LABEL: test_popc16 |
78 | 142 | define void @test_popc16(i16 %a, ptr %b) {
|
79 |
| -; CHECK: cvt.u32.u16 |
80 |
| -; CHECK: popc.b32 |
81 |
| -; CHECK: cvt.u16.u32 |
| 143 | +; CHECK32-LABEL: test_popc16( |
| 144 | +; CHECK32: { |
| 145 | +; CHECK32-NEXT: .reg .b32 %r<4>; |
| 146 | +; CHECK32-EMPTY: |
| 147 | +; CHECK32-NEXT: // %bb.0: |
| 148 | +; CHECK32-NEXT: ld.param.u16 %r1, [test_popc16_param_0]; |
| 149 | +; CHECK32-NEXT: popc.b32 %r2, %r1; |
| 150 | +; CHECK32-NEXT: ld.param.u32 %r3, [test_popc16_param_1]; |
| 151 | +; CHECK32-NEXT: st.u16 [%r3], %r2; |
| 152 | +; CHECK32-NEXT: ret; |
| 153 | +; |
| 154 | +; CHECK64-LABEL: test_popc16( |
| 155 | +; CHECK64: { |
| 156 | +; CHECK64-NEXT: .reg .b32 %r<3>; |
| 157 | +; CHECK64-NEXT: .reg .b64 %rd<2>; |
| 158 | +; CHECK64-EMPTY: |
| 159 | +; CHECK64-NEXT: // %bb.0: |
| 160 | +; CHECK64-NEXT: ld.param.u16 %r1, [test_popc16_param_0]; |
| 161 | +; CHECK64-NEXT: popc.b32 %r2, %r1; |
| 162 | +; CHECK64-NEXT: ld.param.u64 %rd1, [test_popc16_param_1]; |
| 163 | +; CHECK64-NEXT: st.u16 [%rd1], %r2; |
| 164 | +; CHECK64-NEXT: ret; |
82 | 165 | %val = call i16 @llvm.ctpop.i16(i16 %a)
|
83 | 166 | store i16 %val, ptr %b
|
84 | 167 | ret void
|
85 | 168 | }
|
86 | 169 |
|
87 | 170 | ; If we call llvm.ctpop.i16 and then zext the result to i32, we shouldn't need
|
88 | 171 | ; to do any conversions after calling popc.b32, because that returns an i32.
|
89 |
| -; CHECK-LABEL: test_popc16_to_32 |
90 | 172 | define i32 @test_popc16_to_32(i16 %a) {
|
91 |
| -; CHECK: cvt.u32.u16 |
92 |
| -; CHECK: popc.b32 |
93 |
| -; CHECK-NOT: cvt. |
| 173 | +; CHECK-LABEL: test_popc16_to_32( |
| 174 | +; CHECK: { |
| 175 | +; CHECK-NEXT: .reg .b32 %r<3>; |
| 176 | +; CHECK-EMPTY: |
| 177 | +; CHECK-NEXT: // %bb.0: |
| 178 | +; CHECK-NEXT: ld.param.u16 %r1, [test_popc16_to_32_param_0]; |
| 179 | +; CHECK-NEXT: popc.b32 %r2, %r1; |
| 180 | +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| 181 | +; CHECK-NEXT: ret; |
94 | 182 | %val = call i16 @llvm.ctpop.i16(i16 %a)
|
95 | 183 | %zext = zext i16 %val to i32
|
96 | 184 | ret i32 %zext
|
97 | 185 | }
|
98 | 186 |
|
99 | 187 | ; Most of nvvm.read.ptx.sreg.* intrinsics always return the same value and may
|
100 | 188 | ; be CSE'd.
|
101 |
| -; CHECK-LABEL: test_tid |
102 | 189 | define i32 @test_tid() {
|
103 |
| -; CHECK: mov.u32 %r{{.*}}, %tid.x; |
| 190 | +; CHECK-LABEL: test_tid( |
| 191 | +; CHECK: { |
| 192 | +; CHECK-NEXT: .reg .b32 %r<3>; |
| 193 | +; CHECK-EMPTY: |
| 194 | +; CHECK-NEXT: // %bb.0: |
| 195 | +; CHECK-NEXT: mov.u32 %r1, %tid.x; |
| 196 | +; CHECK-NEXT: add.s32 %r2, %r1, %r1; |
| 197 | +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| 198 | +; CHECK-NEXT: ret; |
104 | 199 | %a = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
105 |
| -; CHECK-NOT: mov.u32 %r{{.*}}, %tid.x; |
106 | 200 | %b = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
107 | 201 | %ret = add i32 %a, %b
|
108 |
| -; CHECK: ret |
109 | 202 | ret i32 %ret
|
110 | 203 | }
|
111 | 204 |
|
112 | 205 | ; reading clock() or clock64() should not be CSE'd as each read may return
|
113 | 206 | ; different value.
|
114 |
| -; CHECK-LABEL: test_clock |
115 | 207 | define i32 @test_clock() {
|
116 |
| -; CHECK: mov.u32 %r{{.*}}, %clock; |
| 208 | +; CHECK-LABEL: test_clock( |
| 209 | +; CHECK: { |
| 210 | +; CHECK-NEXT: .reg .b32 %r<4>; |
| 211 | +; CHECK-EMPTY: |
| 212 | +; CHECK-NEXT: // %bb.0: |
| 213 | +; CHECK-NEXT: mov.u32 %r1, %clock; |
| 214 | +; CHECK-NEXT: mov.u32 %r2, %clock; |
| 215 | +; CHECK-NEXT: add.s32 %r3, %r1, %r2; |
| 216 | +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| 217 | +; CHECK-NEXT: ret; |
117 | 218 | %a = tail call i32 @llvm.nvvm.read.ptx.sreg.clock()
|
118 |
| -; CHECK: mov.u32 %r{{.*}}, %clock; |
119 | 219 | %b = tail call i32 @llvm.nvvm.read.ptx.sreg.clock()
|
120 | 220 | %ret = add i32 %a, %b
|
121 |
| -; CHECK: ret |
122 | 221 | ret i32 %ret
|
123 | 222 | }
|
124 | 223 |
|
125 |
| -; CHECK-LABEL: test_clock64 |
126 | 224 | define i64 @test_clock64() {
|
127 |
| -; CHECK: mov.u64 %r{{.*}}, %clock64; |
| 225 | +; CHECK-LABEL: test_clock64( |
| 226 | +; CHECK: { |
| 227 | +; CHECK-NEXT: .reg .b64 %rd<4>; |
| 228 | +; CHECK-EMPTY: |
| 229 | +; CHECK-NEXT: // %bb.0: |
| 230 | +; CHECK-NEXT: mov.u64 %rd1, %clock64; |
| 231 | +; CHECK-NEXT: mov.u64 %rd2, %clock64; |
| 232 | +; CHECK-NEXT: add.s64 %rd3, %rd1, %rd2; |
| 233 | +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; |
| 234 | +; CHECK-NEXT: ret; |
128 | 235 | %a = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64()
|
129 |
| -; CHECK: mov.u64 %r{{.*}}, %clock64; |
130 | 236 | %b = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64()
|
131 | 237 | %ret = add i64 %a, %b
|
132 |
| -; CHECK: ret |
133 | 238 | ret i64 %ret
|
134 | 239 | }
|
135 | 240 |
|
136 |
| -; CHECK-LABEL: test_exit |
137 | 241 | define void @test_exit() {
|
138 |
| -; CHECK: exit; |
| 242 | +; CHECK-LABEL: test_exit( |
| 243 | +; CHECK: { |
| 244 | +; CHECK-EMPTY: |
| 245 | +; CHECK-EMPTY: |
| 246 | +; CHECK-NEXT: // %bb.0: |
| 247 | +; CHECK-NEXT: exit; |
| 248 | +; CHECK-NEXT: ret; |
139 | 249 | call void @llvm.nvvm.exit()
|
140 | 250 | ret void
|
141 | 251 | }
|
142 | 252 |
|
143 |
| -; CHECK-LABEL: test_globaltimer |
144 | 253 | define i64 @test_globaltimer() {
|
145 |
| -; CHECK: mov.u64 %r{{.*}}, %globaltimer; |
| 254 | +; CHECK-LABEL: test_globaltimer( |
| 255 | +; CHECK: { |
| 256 | +; CHECK-NEXT: .reg .b64 %rd<4>; |
| 257 | +; CHECK-EMPTY: |
| 258 | +; CHECK-NEXT: // %bb.0: |
| 259 | +; CHECK-NEXT: mov.u64 %rd1, %globaltimer; |
| 260 | +; CHECK-NEXT: mov.u64 %rd2, %globaltimer; |
| 261 | +; CHECK-NEXT: add.s64 %rd3, %rd1, %rd2; |
| 262 | +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; |
| 263 | +; CHECK-NEXT: ret; |
146 | 264 | %a = tail call i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
|
147 |
| -; CHECK: mov.u64 %r{{.*}}, %globaltimer; |
148 | 265 | %b = tail call i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
|
149 | 266 | %ret = add i64 %a, %b
|
150 |
| -; CHECK: ret |
151 | 267 | ret i64 %ret
|
152 | 268 | }
|
153 | 269 |
|
154 |
| -; CHECK-LABEL: test_cyclecounter |
155 | 270 | define i64 @test_cyclecounter() {
|
156 |
| -; CHECK: mov.u64 %r{{.*}}, %clock64; |
| 271 | +; CHECK-LABEL: test_cyclecounter( |
| 272 | +; CHECK: { |
| 273 | +; CHECK-NEXT: .reg .b64 %rd<4>; |
| 274 | +; CHECK-EMPTY: |
| 275 | +; CHECK-NEXT: // %bb.0: |
| 276 | +; CHECK-NEXT: mov.u64 %rd1, %clock64; |
| 277 | +; CHECK-NEXT: mov.u64 %rd2, %clock64; |
| 278 | +; CHECK-NEXT: add.s64 %rd3, %rd1, %rd2; |
| 279 | +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; |
| 280 | +; CHECK-NEXT: ret; |
157 | 281 | %a = tail call i64 @llvm.readcyclecounter()
|
158 |
| -; CHECK: mov.u64 %r{{.*}}, %clock64; |
159 | 282 | %b = tail call i64 @llvm.readcyclecounter()
|
160 | 283 | %ret = add i64 %a, %b
|
161 |
| -; CHECK: ret |
162 | 284 | ret i64 %ret
|
163 | 285 | }
|
164 | 286 |
|
165 |
| -; CHECK-LABEL: test_steadycounter |
166 | 287 | define i64 @test_steadycounter() {
|
167 |
| -; CHECK: mov.u64 %r{{.*}}, %globaltimer; |
| 288 | +; CHECK-LABEL: test_steadycounter( |
| 289 | +; CHECK: { |
| 290 | +; CHECK-NEXT: .reg .b64 %rd<4>; |
| 291 | +; CHECK-EMPTY: |
| 292 | +; CHECK-NEXT: // %bb.0: |
| 293 | +; CHECK-NEXT: mov.u64 %rd1, %globaltimer; |
| 294 | +; CHECK-NEXT: mov.u64 %rd2, %globaltimer; |
| 295 | +; CHECK-NEXT: add.s64 %rd3, %rd1, %rd2; |
| 296 | +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; |
| 297 | +; CHECK-NEXT: ret; |
168 | 298 | %a = tail call i64 @llvm.readsteadycounter()
|
169 |
| -; CHECK: mov.u64 %r{{.*}}, %globaltimer; |
170 | 299 | %b = tail call i64 @llvm.readsteadycounter()
|
171 | 300 | %ret = add i64 %a, %b
|
172 |
| -; CHECK: ret |
173 | 301 | ret i64 %ret
|
174 | 302 | }
|
175 | 303 |
|
|
0 commit comments