@@ -146,54 +146,47 @@ define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) {
146146define <8 x i16 > @v8i16 (i32 %index , i32 %TC , <8 x i16 > %V1 , <8 x i16 > %V2 ) {
147147; CHECK-LABEL: v8i16:
148148; CHECK: @ %bb.0:
149- ; CHECK-NEXT: vpush {d8, d9}
149+ ; CHECK-NEXT: push {r4, lr}
150+ ; CHECK-NEXT: sub sp, #16
150151; CHECK-NEXT: adr.w r12, .LCPI3_0
151152; CHECK-NEXT: vdup.32 q1, r1
152153; CHECK-NEXT: vldrw.u32 q0, [r12]
153- ; CHECK-NEXT: vmov.i8 q2, #0x0
154- ; CHECK-NEXT: vmov.i8 q3, #0xff
154+ ; CHECK-NEXT: vmov.i8 q2, #0xff
155+ ; CHECK-NEXT: mov r4, sp
156+ ; CHECK-NEXT: adr r1, .LCPI3_1
155157; CHECK-NEXT: vqadd.u32 q0, q0, r0
156158; CHECK-NEXT: vcmp.u32 hi, q1, q0
157- ; CHECK-NEXT: vpsel q4, q3, q2
158- ; CHECK-NEXT: vmov r1, r12, d8
159- ; CHECK-NEXT: vmov.16 q0[0], r1
160- ; CHECK-NEXT: vmov.16 q0[1], r12
161- ; CHECK-NEXT: vmov r1, r12, d9
162- ; CHECK-NEXT: vmov.16 q0[2], r1
163- ; CHECK-NEXT: adr r1, .LCPI3_1
164- ; CHECK-NEXT: vldrw.u32 q4, [r1]
165- ; CHECK-NEXT: vmov.16 q0[3], r12
166- ; CHECK-NEXT: vqadd.u32 q4, q4, r0
167- ; CHECK-NEXT: vcmp.u32 hi, q1, q4
168- ; CHECK-NEXT: vpsel q1, q3, q2
169- ; CHECK-NEXT: vmov r0, r1, d2
170- ; CHECK-NEXT: vmov.16 q0[4], r0
171- ; CHECK-NEXT: vmov.16 q0[5], r1
172- ; CHECK-NEXT: vmov r0, r1, d3
173- ; CHECK-NEXT: vmov.16 q0[6], r0
174- ; CHECK-NEXT: add r0, sp, #24
175- ; CHECK-NEXT: vmov.16 q0[7], r1
159+ ; CHECK-NEXT: vmov.i8 q0, #0x0
160+ ; CHECK-NEXT: vpsel q3, q2, q0
161+ ; CHECK-NEXT: vstrh.32 q3, [r4, #8]
162+ ; CHECK-NEXT: vldrw.u32 q3, [r1]
163+ ; CHECK-NEXT: vqadd.u32 q3, q3, r0
164+ ; CHECK-NEXT: add r0, sp, #32
165+ ; CHECK-NEXT: vcmp.u32 hi, q1, q3
176166; CHECK-NEXT: vldrw.u32 q1, [r0]
177- ; CHECK-NEXT: vcmp.i16 ne, q0, zr
178- ; CHECK-NEXT: vldr d1, [sp, #16]
167+ ; CHECK-NEXT: vpsel q0, q2, q0
168+ ; CHECK-NEXT: vstrh.32 q0, [r4]
169+ ; CHECK-NEXT: vldr d1, [sp, #24]
170+ ; CHECK-NEXT: vldrw.u32 q2, [r4]
179171; CHECK-NEXT: vmov d0, r2, r3
172+ ; CHECK-NEXT: vcmp.i16 ne, q2, zr
180173; CHECK-NEXT: vpsel q0, q0, q1
181174; CHECK-NEXT: vmov r0, r1, d0
182175; CHECK-NEXT: vmov r2, r3, d1
183- ; CHECK-NEXT: vpop {d8, d9}
184- ; CHECK-NEXT: bx lr
176+ ; CHECK-NEXT: add sp, #16
177+ ; CHECK-NEXT: pop {r4, pc}
185178; CHECK-NEXT: .p2align 4
186179; CHECK-NEXT: @ %bb.1:
187180; CHECK-NEXT: .LCPI3_0:
188- ; CHECK-NEXT: .long 0 @ 0x0
189- ; CHECK-NEXT: .long 1 @ 0x1
190- ; CHECK-NEXT: .long 2 @ 0x2
191- ; CHECK-NEXT: .long 3 @ 0x3
192- ; CHECK-NEXT: .LCPI3_1:
193181; CHECK-NEXT: .long 4 @ 0x4
194182; CHECK-NEXT: .long 5 @ 0x5
195183; CHECK-NEXT: .long 6 @ 0x6
196184; CHECK-NEXT: .long 7 @ 0x7
185+ ; CHECK-NEXT: .LCPI3_1:
186+ ; CHECK-NEXT: .long 0 @ 0x0
187+ ; CHECK-NEXT: .long 1 @ 0x1
188+ ; CHECK-NEXT: .long 2 @ 0x2
189+ ; CHECK-NEXT: .long 3 @ 0x3
197190 %active.lane.mask = call <8 x i1 > @llvm.get.active.lane.mask.v8i1.i32 (i32 %index , i32 %TC )
198191 %select = select <8 x i1 > %active.lane.mask , <8 x i16 > %V1 , <8 x i16 > %V2
199192 ret <8 x i16 > %select
@@ -202,122 +195,79 @@ define <8 x i16> @v8i16(i32 %index, i32 %TC, <8 x i16> %V1, <8 x i16> %V2) {
202195define <16 x i8 > @v16i8 (i32 %index , i32 %TC , <16 x i8 > %V1 , <16 x i8 > %V2 ) {
203196; CHECK-LABEL: v16i8:
204197; CHECK: @ %bb.0:
205- ; CHECK-NEXT: vpush {d8, d9, d10, d11}
198+ ; CHECK-NEXT: push {r4, r5, r7, lr}
199+ ; CHECK-NEXT: sub sp, #48
206200; CHECK-NEXT: adr.w r12, .LCPI4_0
207- ; CHECK-NEXT: vdup.32 q3 , r1
201+ ; CHECK-NEXT: vdup.32 q2 , r1
208202; CHECK-NEXT: vldrw.u32 q0, [r12]
209203; CHECK-NEXT: vmov.i8 q1, #0xff
204+ ; CHECK-NEXT: add r5, sp, #16
205+ ; CHECK-NEXT: adr r1, .LCPI4_1
210206; CHECK-NEXT: vqadd.u32 q0, q0, r0
211- ; CHECK-NEXT: vcmp.u32 hi, q3, q0
207+ ; CHECK-NEXT: adr r4, .LCPI4_3
208+ ; CHECK-NEXT: vcmp.u32 hi, q2, q0
212209; CHECK-NEXT: vmov.i8 q0, #0x0
213- ; CHECK-NEXT: vpsel q4, q1, q0
214- ; CHECK-NEXT: vmov r1, r12, d8
215- ; CHECK-NEXT: vmov.16 q2[0], r1
216- ; CHECK-NEXT: vmov.16 q2[1], r12
217- ; CHECK-NEXT: vmov r1, r12, d9
218- ; CHECK-NEXT: vmov.16 q2[2], r1
219- ; CHECK-NEXT: adr r1, .LCPI4_1
220- ; CHECK-NEXT: vldrw.u32 q4, [r1]
221- ; CHECK-NEXT: vmov.16 q2[3], r12
222- ; CHECK-NEXT: vqadd.u32 q4, q4, r0
223- ; CHECK-NEXT: vcmp.u32 hi, q3, q4
224- ; CHECK-NEXT: vpsel q4, q1, q0
225- ; CHECK-NEXT: vmov r1, r12, d8
226- ; CHECK-NEXT: vmov.16 q2[4], r1
227- ; CHECK-NEXT: vmov.16 q2[5], r12
228- ; CHECK-NEXT: vmov r1, r12, d9
229- ; CHECK-NEXT: vmov.16 q2[6], r1
230- ; CHECK-NEXT: vmov.16 q2[7], r12
231- ; CHECK-NEXT: vcmp.i16 ne, q2, zr
232- ; CHECK-NEXT: vpsel q4, q1, q0
233- ; CHECK-NEXT: vmov.u16 r1, q4[0]
234- ; CHECK-NEXT: vmov.8 q2[0], r1
235- ; CHECK-NEXT: vmov.u16 r1, q4[1]
236- ; CHECK-NEXT: vmov.8 q2[1], r1
237- ; CHECK-NEXT: vmov.u16 r1, q4[2]
238- ; CHECK-NEXT: vmov.8 q2[2], r1
239- ; CHECK-NEXT: vmov.u16 r1, q4[3]
240- ; CHECK-NEXT: vmov.8 q2[3], r1
241- ; CHECK-NEXT: vmov.u16 r1, q4[4]
242- ; CHECK-NEXT: vmov.8 q2[4], r1
243- ; CHECK-NEXT: vmov.u16 r1, q4[5]
244- ; CHECK-NEXT: vmov.8 q2[5], r1
245- ; CHECK-NEXT: vmov.u16 r1, q4[6]
246- ; CHECK-NEXT: vmov.8 q2[6], r1
247- ; CHECK-NEXT: vmov.u16 r1, q4[7]
248- ; CHECK-NEXT: vmov.8 q2[7], r1
210+ ; CHECK-NEXT: vpsel q3, q1, q0
211+ ; CHECK-NEXT: vstrh.32 q3, [r5, #8]
212+ ; CHECK-NEXT: vldrw.u32 q3, [r1]
249213; CHECK-NEXT: adr r1, .LCPI4_2
250- ; CHECK-NEXT: vldrw.u32 q4, [r1]
251- ; CHECK-NEXT: vqadd.u32 q4, q4, r0
252- ; CHECK-NEXT: vcmp.u32 hi, q3, q4
253- ; CHECK-NEXT: vpsel q5, q1, q0
254- ; CHECK-NEXT: vmov r1, r12, d10
255- ; CHECK-NEXT: vmov.16 q4[0], r1
256- ; CHECK-NEXT: vmov.16 q4[1], r12
257- ; CHECK-NEXT: vmov r1, r12, d11
258- ; CHECK-NEXT: vmov.16 q4[2], r1
259- ; CHECK-NEXT: adr r1, .LCPI4_3
260- ; CHECK-NEXT: vldrw.u32 q5, [r1]
261- ; CHECK-NEXT: vmov.16 q4[3], r12
262- ; CHECK-NEXT: vqadd.u32 q5, q5, r0
263- ; CHECK-NEXT: vcmp.u32 hi, q3, q5
214+ ; CHECK-NEXT: vqadd.u32 q3, q3, r0
215+ ; CHECK-NEXT: vcmp.u32 hi, q2, q3
216+ ; CHECK-NEXT: vpsel q3, q1, q0
217+ ; CHECK-NEXT: vstrh.32 q3, [r5]
218+ ; CHECK-NEXT: vldrw.u32 q3, [r1]
219+ ; CHECK-NEXT: mov r1, sp
220+ ; CHECK-NEXT: vqadd.u32 q3, q3, r0
221+ ; CHECK-NEXT: vcmp.u32 hi, q2, q3
264222; CHECK-NEXT: vpsel q3, q1, q0
265- ; CHECK-NEXT: vmov r0, r1, d6
266- ; CHECK-NEXT: vmov.16 q4[4], r0
267- ; CHECK-NEXT: vmov.16 q4[5], r1
268- ; CHECK-NEXT: vmov r0, r1, d7
269- ; CHECK-NEXT: vmov.16 q4[6], r0
270- ; CHECK-NEXT: vmov.16 q4[7], r1
271- ; CHECK-NEXT: vcmp.i16 ne, q4, zr
223+ ; CHECK-NEXT: vstrh.32 q3, [r1, #8]
224+ ; CHECK-NEXT: vldrw.u32 q3, [r4]
225+ ; CHECK-NEXT: vqadd.u32 q3, q3, r0
226+ ; CHECK-NEXT: add r0, sp, #32
227+ ; CHECK-NEXT: vcmp.u32 hi, q2, q3
228+ ; CHECK-NEXT: vpsel q2, q1, q0
229+ ; CHECK-NEXT: vstrh.32 q2, [r1]
230+ ; CHECK-NEXT: vldrw.u32 q2, [r5]
231+ ; CHECK-NEXT: vcmp.i16 ne, q2, zr
232+ ; CHECK-NEXT: vpsel q2, q1, q0
233+ ; CHECK-NEXT: vstrb.16 q2, [r0, #8]
234+ ; CHECK-NEXT: vldrw.u32 q2, [r1]
235+ ; CHECK-NEXT: add r1, sp, #72
236+ ; CHECK-NEXT: vcmp.i16 ne, q2, zr
272237; CHECK-NEXT: vpsel q0, q1, q0
273- ; CHECK-NEXT: vmov.u16 r0, q0[0]
274- ; CHECK-NEXT: vmov.8 q2[8], r0
275- ; CHECK-NEXT: vmov.u16 r0, q0[1]
276- ; CHECK-NEXT: vmov.8 q2[9], r0
277- ; CHECK-NEXT: vmov.u16 r0, q0[2]
278- ; CHECK-NEXT: vmov.8 q2[10], r0
279- ; CHECK-NEXT: vmov.u16 r0, q0[3]
280- ; CHECK-NEXT: vmov.8 q2[11], r0
281- ; CHECK-NEXT: vmov.u16 r0, q0[4]
282- ; CHECK-NEXT: vmov.8 q2[12], r0
283- ; CHECK-NEXT: vmov.u16 r0, q0[5]
284- ; CHECK-NEXT: vmov.8 q2[13], r0
285- ; CHECK-NEXT: vmov.u16 r0, q0[6]
286- ; CHECK-NEXT: vmov.8 q2[14], r0
287- ; CHECK-NEXT: vmov.u16 r0, q0[7]
288- ; CHECK-NEXT: vmov.8 q2[15], r0
289- ; CHECK-NEXT: add r0, sp, #40
290- ; CHECK-NEXT: vldr d1, [sp, #32]
291- ; CHECK-NEXT: vldrw.u32 q1, [r0]
292- ; CHECK-NEXT: vcmp.i8 ne, q2, zr
238+ ; CHECK-NEXT: vldrw.u32 q1, [r1]
239+ ; CHECK-NEXT: vstrb.16 q0, [r0]
240+ ; CHECK-NEXT: vldr d1, [sp, #64]
241+ ; CHECK-NEXT: vldrw.u32 q2, [r0]
293242; CHECK-NEXT: vmov d0, r2, r3
243+ ; CHECK-NEXT: vcmp.i8 ne, q2, zr
294244; CHECK-NEXT: vpsel q0, q0, q1
295245; CHECK-NEXT: vmov r0, r1, d0
296246; CHECK-NEXT: vmov r2, r3, d1
297- ; CHECK-NEXT: vpop {d8, d9, d10, d11}
298- ; CHECK-NEXT: bx lr
247+ ; CHECK-NEXT: add sp, #48
248+ ; CHECK-NEXT: pop {r4, r5, r7, pc}
299249; CHECK-NEXT: .p2align 4
300250; CHECK-NEXT: @ %bb.1:
301251; CHECK-NEXT: .LCPI4_0:
302- ; CHECK-NEXT: .long 0 @ 0x0
303- ; CHECK-NEXT: .long 1 @ 0x1
304- ; CHECK-NEXT: .long 2 @ 0x2
305- ; CHECK-NEXT: .long 3 @ 0x3
252+ ; CHECK-NEXT: .long 12 @ 0xc
253+ ; CHECK-NEXT: .long 13 @ 0xd
254+ ; CHECK-NEXT: .long 14 @ 0xe
255+ ; CHECK-NEXT: .long 15 @ 0xf
306256; CHECK-NEXT: .LCPI4_1:
307- ; CHECK-NEXT: .long 4 @ 0x4
308- ; CHECK-NEXT: .long 5 @ 0x5
309- ; CHECK-NEXT: .long 6 @ 0x6
310- ; CHECK-NEXT: .long 7 @ 0x7
311- ; CHECK-NEXT: .LCPI4_2:
312257; CHECK-NEXT: .long 8 @ 0x8
313258; CHECK-NEXT: .long 9 @ 0x9
314259; CHECK-NEXT: .long 10 @ 0xa
315260; CHECK-NEXT: .long 11 @ 0xb
261+ ; CHECK-NEXT: .LCPI4_2:
262+ ; CHECK-NEXT: .long 4 @ 0x4
263+ ; CHECK-NEXT: .long 5 @ 0x5
264+ ; CHECK-NEXT: .long 6 @ 0x6
265+ ; CHECK-NEXT: .long 7 @ 0x7
316266; CHECK-NEXT: .LCPI4_3:
317- ; CHECK-NEXT: .long 12 @ 0xc
318- ; CHECK-NEXT: .long 13 @ 0xd
319- ; CHECK-NEXT: .long 14 @ 0xe
320- ; CHECK-NEXT: .long 15 @ 0xf
267+ ; CHECK-NEXT: .long 0 @ 0x0
268+ ; CHECK-NEXT: .long 1 @ 0x1
269+ ; CHECK-NEXT: .long 2 @ 0x2
270+ ; CHECK-NEXT: .long 3 @ 0x3
321271 %active.lane.mask = call <16 x i1 > @llvm.get.active.lane.mask.v16i1.i32 (i32 %index , i32 %TC )
322272 %select = select <16 x i1 > %active.lane.mask , <16 x i8 > %V1 , <16 x i8 > %V2
323273 ret <16 x i8 > %select
0 commit comments