Skip to content

Commit 3dff4f5

Browse files
committed
[ARM] Add extra vabd, vhadd and vmulh tests. NFC
This is some extra testing for vabd, vhadd and vmulh. Some of the tests have also be reordered.
1 parent 74b98ab commit 3dff4f5

File tree

3 files changed

+808
-327
lines changed

3 files changed

+808
-327
lines changed

llvm/test/CodeGen/Thumb2/mve-vabdus.ll

Lines changed: 221 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve %s -o - | FileCheck %s
33

4-
define arm_aapcs_vfpcc <16 x i8> @vabd_s8(<16 x i8> %src1, <16 x i8> %src2) {
5-
; CHECK-LABEL: vabd_s8:
4+
define arm_aapcs_vfpcc <16 x i8> @vabd_v16s8(<16 x i8> %src1, <16 x i8> %src2) {
5+
; CHECK-LABEL: vabd_v16s8:
66
; CHECK: @ %bb.0:
77
; CHECK-NEXT: vabd.s8 q0, q0, q1
88
; CHECK-NEXT: bx lr
@@ -16,8 +16,46 @@ define arm_aapcs_vfpcc <16 x i8> @vabd_s8(<16 x i8> %src1, <16 x i8> %src2) {
1616
ret <16 x i8> %result
1717
}
1818

19-
define arm_aapcs_vfpcc <8 x i16> @vabd_s16(<8 x i16> %src1, <8 x i16> %src2) {
20-
; CHECK-LABEL: vabd_s16:
19+
define arm_aapcs_vfpcc <8 x i8> @vabd_v8s8(<8 x i8> %src1, <8 x i8> %src2) {
20+
; CHECK-LABEL: vabd_v8s8:
21+
; CHECK: @ %bb.0:
22+
; CHECK-NEXT: vmovlb.s8 q1, q1
23+
; CHECK-NEXT: vmovlb.s8 q0, q0
24+
; CHECK-NEXT: vsub.i16 q0, q0, q1
25+
; CHECK-NEXT: vabs.s16 q0, q0
26+
; CHECK-NEXT: bx lr
27+
%sextsrc1 = sext <8 x i8> %src1 to <8 x i16>
28+
%sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
29+
%add1 = sub <8 x i16> %sextsrc1, %sextsrc2
30+
%add2 = sub <8 x i16> zeroinitializer, %add1
31+
%c = icmp sge <8 x i16> %add1, zeroinitializer
32+
%s = select <8 x i1> %c, <8 x i16> %add1, <8 x i16> %add2
33+
%result = trunc <8 x i16> %s to <8 x i8>
34+
ret <8 x i8> %result
35+
}
36+
37+
define arm_aapcs_vfpcc <4 x i8> @vabd_v4s8(<4 x i8> %src1, <4 x i8> %src2) {
38+
; CHECK-LABEL: vabd_v4s8:
39+
; CHECK: @ %bb.0:
40+
; CHECK-NEXT: vmovlb.s8 q1, q1
41+
; CHECK-NEXT: vmovlb.s8 q0, q0
42+
; CHECK-NEXT: vmovlb.s16 q1, q1
43+
; CHECK-NEXT: vmovlb.s16 q0, q0
44+
; CHECK-NEXT: vsub.i32 q0, q0, q1
45+
; CHECK-NEXT: vabs.s32 q0, q0
46+
; CHECK-NEXT: bx lr
47+
%sextsrc1 = sext <4 x i8> %src1 to <4 x i16>
48+
%sextsrc2 = sext <4 x i8> %src2 to <4 x i16>
49+
%add1 = sub <4 x i16> %sextsrc1, %sextsrc2
50+
%add2 = sub <4 x i16> zeroinitializer, %add1
51+
%c = icmp sge <4 x i16> %add1, zeroinitializer
52+
%s = select <4 x i1> %c, <4 x i16> %add1, <4 x i16> %add2
53+
%result = trunc <4 x i16> %s to <4 x i8>
54+
ret <4 x i8> %result
55+
}
56+
57+
define arm_aapcs_vfpcc <8 x i16> @vabd_v8s16(<8 x i16> %src1, <8 x i16> %src2) {
58+
; CHECK-LABEL: vabd_v8s16:
2159
; CHECK: @ %bb.0:
2260
; CHECK-NEXT: vabd.s16 q0, q0, q1
2361
; CHECK-NEXT: bx lr
@@ -31,8 +69,26 @@ define arm_aapcs_vfpcc <8 x i16> @vabd_s16(<8 x i16> %src1, <8 x i16> %src2) {
3169
ret <8 x i16> %result
3270
}
3371

34-
define arm_aapcs_vfpcc <4 x i32> @vabd_s32(<4 x i32> %src1, <4 x i32> %src2) {
35-
; CHECK-LABEL: vabd_s32:
72+
define arm_aapcs_vfpcc <4 x i16> @vabd_v4s16(<4 x i16> %src1, <4 x i16> %src2) {
73+
; CHECK-LABEL: vabd_v4s16:
74+
; CHECK: @ %bb.0:
75+
; CHECK-NEXT: vmovlb.s16 q1, q1
76+
; CHECK-NEXT: vmovlb.s16 q0, q0
77+
; CHECK-NEXT: vsub.i32 q0, q0, q1
78+
; CHECK-NEXT: vabs.s32 q0, q0
79+
; CHECK-NEXT: bx lr
80+
%sextsrc1 = sext <4 x i16> %src1 to <4 x i32>
81+
%sextsrc2 = sext <4 x i16> %src2 to <4 x i32>
82+
%add1 = sub <4 x i32> %sextsrc1, %sextsrc2
83+
%add2 = sub <4 x i32> zeroinitializer, %add1
84+
%c = icmp sge <4 x i32> %add1, zeroinitializer
85+
%s = select <4 x i1> %c, <4 x i32> %add1, <4 x i32> %add2
86+
%result = trunc <4 x i32> %s to <4 x i16>
87+
ret <4 x i16> %result
88+
}
89+
90+
define arm_aapcs_vfpcc <4 x i32> @vabd_v4s32(<4 x i32> %src1, <4 x i32> %src2) {
91+
; CHECK-LABEL: vabd_v4s32:
3692
; CHECK: @ %bb.0:
3793
; CHECK-NEXT: vabd.s32 q0, q0, q1
3894
; CHECK-NEXT: bx lr
@@ -46,8 +102,44 @@ define arm_aapcs_vfpcc <4 x i32> @vabd_s32(<4 x i32> %src1, <4 x i32> %src2) {
46102
ret <4 x i32> %result
47103
}
48104

49-
define arm_aapcs_vfpcc <16 x i8> @vabd_u8(<16 x i8> %src1, <16 x i8> %src2) {
50-
; CHECK-LABEL: vabd_u8:
105+
define arm_aapcs_vfpcc <2 x i32> @vabd_v2s32(<2 x i32> %src1, <2 x i32> %src2) {
106+
; CHECK-LABEL: vabd_v2s32:
107+
; CHECK: @ %bb.0:
108+
; CHECK-NEXT: .save {r7, lr}
109+
; CHECK-NEXT: push {r7, lr}
110+
; CHECK-NEXT: vmov r0, s2
111+
; CHECK-NEXT: vmov r2, s6
112+
; CHECK-NEXT: vmov r3, s0
113+
; CHECK-NEXT: asrs r1, r0, #31
114+
; CHECK-NEXT: subs r0, r0, r2
115+
; CHECK-NEXT: sbc.w r1, r1, r2, asr #31
116+
; CHECK-NEXT: asrs r2, r3, #31
117+
; CHECK-NEXT: adds.w r0, r0, r1, asr #31
118+
; CHECK-NEXT: eor.w lr, r0, r1, asr #31
119+
; CHECK-NEXT: vmov r0, s4
120+
; CHECK-NEXT: adc.w r12, r1, r1, asr #31
121+
; CHECK-NEXT: eor.w r1, r12, r1, asr #31
122+
; CHECK-NEXT: subs r3, r3, r0
123+
; CHECK-NEXT: sbc.w r0, r2, r0, asr #31
124+
; CHECK-NEXT: adds.w r2, r3, r0, asr #31
125+
; CHECK-NEXT: eor.w r2, r2, r0, asr #31
126+
; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
127+
; CHECK-NEXT: adc.w r2, r0, r0, asr #31
128+
; CHECK-NEXT: eor.w r0, r2, r0, asr #31
129+
; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
130+
; CHECK-NEXT: pop {r7, pc}
131+
%sextsrc1 = sext <2 x i32> %src1 to <2 x i64>
132+
%sextsrc2 = sext <2 x i32> %src2 to <2 x i64>
133+
%add1 = sub <2 x i64> %sextsrc1, %sextsrc2
134+
%add2 = sub <2 x i64> zeroinitializer, %add1
135+
%c = icmp sge <2 x i64> %add1, zeroinitializer
136+
%s = select <2 x i1> %c, <2 x i64> %add1, <2 x i64> %add2
137+
%result = trunc <2 x i64> %s to <2 x i32>
138+
ret <2 x i32> %result
139+
}
140+
141+
define arm_aapcs_vfpcc <16 x i8> @vabd_v16u8(<16 x i8> %src1, <16 x i8> %src2) {
142+
; CHECK-LABEL: vabd_v16u8:
51143
; CHECK: @ %bb.0:
52144
; CHECK-NEXT: vabd.u8 q0, q0, q1
53145
; CHECK-NEXT: bx lr
@@ -61,8 +153,45 @@ define arm_aapcs_vfpcc <16 x i8> @vabd_u8(<16 x i8> %src1, <16 x i8> %src2) {
61153
ret <16 x i8> %result
62154
}
63155

64-
define arm_aapcs_vfpcc <8 x i16> @vabd_u16(<8 x i16> %src1, <8 x i16> %src2) {
65-
; CHECK-LABEL: vabd_u16:
156+
define arm_aapcs_vfpcc <8 x i8> @vabd_v8u8(<8 x i8> %src1, <8 x i8> %src2) {
157+
; CHECK-LABEL: vabd_v8u8:
158+
; CHECK: @ %bb.0:
159+
; CHECK-NEXT: vmovlb.u8 q1, q1
160+
; CHECK-NEXT: vmovlb.u8 q0, q0
161+
; CHECK-NEXT: vsub.i16 q0, q0, q1
162+
; CHECK-NEXT: vabs.s16 q0, q0
163+
; CHECK-NEXT: bx lr
164+
%zextsrc1 = zext <8 x i8> %src1 to <8 x i16>
165+
%zextsrc2 = zext <8 x i8> %src2 to <8 x i16>
166+
%add1 = sub <8 x i16> %zextsrc1, %zextsrc2
167+
%add2 = sub <8 x i16> zeroinitializer, %add1
168+
%c = icmp sge <8 x i16> %add1, zeroinitializer
169+
%s = select <8 x i1> %c, <8 x i16> %add1, <8 x i16> %add2
170+
%result = trunc <8 x i16> %s to <8 x i8>
171+
ret <8 x i8> %result
172+
}
173+
174+
define arm_aapcs_vfpcc <4 x i8> @vabd_v4u8(<4 x i8> %src1, <4 x i8> %src2) {
175+
; CHECK-LABEL: vabd_v4u8:
176+
; CHECK: @ %bb.0:
177+
; CHECK-NEXT: vmov.i32 q2, #0xff
178+
; CHECK-NEXT: vand q1, q1, q2
179+
; CHECK-NEXT: vand q0, q0, q2
180+
; CHECK-NEXT: vsub.i32 q0, q0, q1
181+
; CHECK-NEXT: vabs.s32 q0, q0
182+
; CHECK-NEXT: bx lr
183+
%zextsrc1 = zext <4 x i8> %src1 to <4 x i16>
184+
%zextsrc2 = zext <4 x i8> %src2 to <4 x i16>
185+
%add1 = sub <4 x i16> %zextsrc1, %zextsrc2
186+
%add2 = sub <4 x i16> zeroinitializer, %add1
187+
%c = icmp sge <4 x i16> %add1, zeroinitializer
188+
%s = select <4 x i1> %c, <4 x i16> %add1, <4 x i16> %add2
189+
%result = trunc <4 x i16> %s to <4 x i8>
190+
ret <4 x i8> %result
191+
}
192+
193+
define arm_aapcs_vfpcc <8 x i16> @vabd_v8u16(<8 x i16> %src1, <8 x i16> %src2) {
194+
; CHECK-LABEL: vabd_v8u16:
66195
; CHECK: @ %bb.0:
67196
; CHECK-NEXT: vabd.u16 q0, q0, q1
68197
; CHECK-NEXT: bx lr
@@ -76,6 +205,24 @@ define arm_aapcs_vfpcc <8 x i16> @vabd_u16(<8 x i16> %src1, <8 x i16> %src2) {
76205
ret <8 x i16> %result
77206
}
78207

208+
define arm_aapcs_vfpcc <4 x i16> @vabd_v4u16(<4 x i16> %src1, <4 x i16> %src2) {
209+
; CHECK-LABEL: vabd_v4u16:
210+
; CHECK: @ %bb.0:
211+
; CHECK-NEXT: vmovlb.u16 q1, q1
212+
; CHECK-NEXT: vmovlb.u16 q0, q0
213+
; CHECK-NEXT: vsub.i32 q0, q0, q1
214+
; CHECK-NEXT: vabs.s32 q0, q0
215+
; CHECK-NEXT: bx lr
216+
%zextsrc1 = zext <4 x i16> %src1 to <4 x i32>
217+
%zextsrc2 = zext <4 x i16> %src2 to <4 x i32>
218+
%add1 = sub <4 x i32> %zextsrc1, %zextsrc2
219+
%add2 = sub <4 x i32> zeroinitializer, %add1
220+
%c = icmp sge <4 x i32> %add1, zeroinitializer
221+
%s = select <4 x i1> %c, <4 x i32> %add1, <4 x i32> %add2
222+
%result = trunc <4 x i32> %s to <4 x i16>
223+
ret <4 x i16> %result
224+
}
225+
79226
define arm_aapcs_vfpcc <4 x i32> @vabd_u32(<4 x i32> %src1, <4 x i32> %src2) {
80227
; CHECK-LABEL: vabd_u32:
81228
; CHECK: @ %bb.0:
@@ -91,19 +238,71 @@ define arm_aapcs_vfpcc <4 x i32> @vabd_u32(<4 x i32> %src1, <4 x i32> %src2) {
91238
ret <4 x i32> %result
92239
}
93240

241+
define arm_aapcs_vfpcc <4 x i32> @vabd_v4u32(<4 x i32> %src1, <4 x i32> %src2) {
242+
; CHECK-LABEL: vabd_v4u32:
243+
; CHECK: @ %bb.0:
244+
; CHECK-NEXT: vabd.u32 q0, q0, q1
245+
; CHECK-NEXT: bx lr
246+
%zextsrc1 = zext <4 x i32> %src1 to <4 x i64>
247+
%zextsrc2 = zext <4 x i32> %src2 to <4 x i64>
248+
%add1 = sub <4 x i64> %zextsrc1, %zextsrc2
249+
%add2 = sub <4 x i64> zeroinitializer, %add1
250+
%c = icmp sge <4 x i64> %add1, zeroinitializer
251+
%s = select <4 x i1> %c, <4 x i64> %add1, <4 x i64> %add2
252+
%result = trunc <4 x i64> %s to <4 x i32>
253+
ret <4 x i32> %result
254+
}
255+
256+
define arm_aapcs_vfpcc <2 x i32> @vabd_v2u32(<2 x i32> %src1, <2 x i32> %src2) {
257+
; CHECK-LABEL: vabd_v2u32:
258+
; CHECK: @ %bb.0:
259+
; CHECK-NEXT: .save {r4, lr}
260+
; CHECK-NEXT: push {r4, lr}
261+
; CHECK-NEXT: vmov.i64 q2, #0xffffffff
262+
; CHECK-NEXT: vand q1, q1, q2
263+
; CHECK-NEXT: vand q0, q0, q2
264+
; CHECK-NEXT: vmov r0, r1, d3
265+
; CHECK-NEXT: vmov r2, r3, d1
266+
; CHECK-NEXT: subs r0, r2, r0
267+
; CHECK-NEXT: sbc.w r1, r3, r1
268+
; CHECK-NEXT: vmov r3, r2, d2
269+
; CHECK-NEXT: adds.w r0, r0, r1, asr #31
270+
; CHECK-NEXT: eor.w lr, r0, r1, asr #31
271+
; CHECK-NEXT: vmov r0, r4, d0
272+
; CHECK-NEXT: adc.w r12, r1, r1, asr #31
273+
; CHECK-NEXT: subs r0, r0, r3
274+
; CHECK-NEXT: sbc.w r2, r4, r2
275+
; CHECK-NEXT: adds.w r0, r0, r2, asr #31
276+
; CHECK-NEXT: eor.w r0, r0, r2, asr #31
277+
; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
278+
; CHECK-NEXT: eor.w r0, r12, r1, asr #31
279+
; CHECK-NEXT: adc.w r1, r2, r2, asr #31
280+
; CHECK-NEXT: eor.w r1, r1, r2, asr #31
281+
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
282+
; CHECK-NEXT: pop {r4, pc}
283+
%zextsrc1 = zext <2 x i32> %src1 to <2 x i64>
284+
%zextsrc2 = zext <2 x i32> %src2 to <2 x i64>
285+
%add1 = sub <2 x i64> %zextsrc1, %zextsrc2
286+
%add2 = sub <2 x i64> zeroinitializer, %add1
287+
%c = icmp sge <2 x i64> %add1, zeroinitializer
288+
%s = select <2 x i1> %c, <2 x i64> %add1, <2 x i64> %add2
289+
%result = trunc <2 x i64> %s to <2 x i32>
290+
ret <2 x i32> %result
291+
}
292+
94293
define void @vabd_loop_s8(i8* nocapture readonly %x, i8* nocapture readonly %y, i8* noalias nocapture %z, i32 %n) {
95294
; CHECK-LABEL: vabd_loop_s8:
96295
; CHECK: @ %bb.0: @ %entry
97296
; CHECK-NEXT: .save {r7, lr}
98297
; CHECK-NEXT: push {r7, lr}
99298
; CHECK-NEXT: mov.w lr, #64
100-
; CHECK-NEXT: .LBB6_1: @ %vector.body
299+
; CHECK-NEXT: .LBB15_1: @ %vector.body
101300
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
102301
; CHECK-NEXT: vldrb.u8 q0, [r1], #16
103302
; CHECK-NEXT: vldrb.u8 q1, [r0], #16
104303
; CHECK-NEXT: vabd.s8 q0, q1, q0
105304
; CHECK-NEXT: vstrb.8 q0, [r2], #16
106-
; CHECK-NEXT: le lr, .LBB6_1
305+
; CHECK-NEXT: le lr, .LBB15_1
107306
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
108307
; CHECK-NEXT: pop {r7, pc}
109308
entry:
@@ -141,13 +340,13 @@ define void @vabd_loop_s16(i16* nocapture readonly %x, i16* nocapture readonly %
141340
; CHECK-NEXT: .save {r7, lr}
142341
; CHECK-NEXT: push {r7, lr}
143342
; CHECK-NEXT: mov.w lr, #128
144-
; CHECK-NEXT: .LBB7_1: @ %vector.body
343+
; CHECK-NEXT: .LBB16_1: @ %vector.body
145344
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
146345
; CHECK-NEXT: vldrh.u16 q0, [r1], #16
147346
; CHECK-NEXT: vldrh.u16 q1, [r0], #16
148347
; CHECK-NEXT: vabd.s16 q0, q1, q0
149348
; CHECK-NEXT: vstrb.8 q0, [r2], #16
150-
; CHECK-NEXT: le lr, .LBB7_1
349+
; CHECK-NEXT: le lr, .LBB16_1
151350
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
152351
; CHECK-NEXT: pop {r7, pc}
153352
entry:
@@ -186,7 +385,7 @@ define void @vabd_loop_s32(i32* nocapture readonly %x, i32* nocapture readonly %
186385
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
187386
; CHECK-NEXT: mov.w lr, #256
188387
; CHECK-NEXT: vmov.i32 q0, #0x0
189-
; CHECK-NEXT: .LBB8_1: @ %vector.body
388+
; CHECK-NEXT: .LBB17_1: @ %vector.body
190389
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
191390
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
192391
; CHECK-NEXT: vmov.f32 s8, s6
@@ -231,7 +430,7 @@ define void @vabd_loop_s32(i32* nocapture readonly %x, i32* nocapture readonly %
231430
; CHECK-NEXT: vpst
232431
; CHECK-NEXT: vsubt.i32 q2, q0, q2
233432
; CHECK-NEXT: vstrb.8 q2, [r2], #16
234-
; CHECK-NEXT: le lr, .LBB8_1
433+
; CHECK-NEXT: le lr, .LBB17_1
235434
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
236435
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
237436
entry:
@@ -269,13 +468,13 @@ define void @vabd_loop_u8(i8* nocapture readonly %x, i8* nocapture readonly %y,
269468
; CHECK-NEXT: .save {r7, lr}
270469
; CHECK-NEXT: push {r7, lr}
271470
; CHECK-NEXT: mov.w lr, #64
272-
; CHECK-NEXT: .LBB9_1: @ %vector.body
471+
; CHECK-NEXT: .LBB18_1: @ %vector.body
273472
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
274473
; CHECK-NEXT: vldrb.u8 q0, [r1], #16
275474
; CHECK-NEXT: vldrb.u8 q1, [r0], #16
276475
; CHECK-NEXT: vabd.u8 q0, q1, q0
277476
; CHECK-NEXT: vstrb.8 q0, [r2], #16
278-
; CHECK-NEXT: le lr, .LBB9_1
477+
; CHECK-NEXT: le lr, .LBB18_1
279478
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
280479
; CHECK-NEXT: pop {r7, pc}
281480
entry:
@@ -313,13 +512,13 @@ define void @vabd_loop_u16(i16* nocapture readonly %x, i16* nocapture readonly %
313512
; CHECK-NEXT: .save {r7, lr}
314513
; CHECK-NEXT: push {r7, lr}
315514
; CHECK-NEXT: mov.w lr, #128
316-
; CHECK-NEXT: .LBB10_1: @ %vector.body
515+
; CHECK-NEXT: .LBB19_1: @ %vector.body
317516
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
318517
; CHECK-NEXT: vldrh.u16 q0, [r1], #16
319518
; CHECK-NEXT: vldrh.u16 q1, [r0], #16
320519
; CHECK-NEXT: vabd.u16 q0, q1, q0
321520
; CHECK-NEXT: vstrb.8 q0, [r2], #16
322-
; CHECK-NEXT: le lr, .LBB10_1
521+
; CHECK-NEXT: le lr, .LBB19_1
323522
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
324523
; CHECK-NEXT: pop {r7, pc}
325524
entry:
@@ -361,7 +560,7 @@ define void @vabd_loop_u32(i32* nocapture readonly %x, i32* nocapture readonly %
361560
; CHECK-NEXT: mov.w lr, #256
362561
; CHECK-NEXT: vmov.i64 q0, #0xffffffff
363562
; CHECK-NEXT: vmov.i32 q1, #0x0
364-
; CHECK-NEXT: .LBB11_1: @ %vector.body
563+
; CHECK-NEXT: .LBB20_1: @ %vector.body
365564
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
366565
; CHECK-NEXT: vldrw.u32 q4, [r1], #16
367566
; CHECK-NEXT: vldrw.u32 q5, [r0], #16
@@ -406,7 +605,7 @@ define void @vabd_loop_u32(i32* nocapture readonly %x, i32* nocapture readonly %
406605
; CHECK-NEXT: vpst
407606
; CHECK-NEXT: vsubt.i32 q4, q1, q4
408607
; CHECK-NEXT: vstrb.8 q4, [r2], #16
409-
; CHECK-NEXT: le lr, .LBB11_1
608+
; CHECK-NEXT: le lr, .LBB20_1
410609
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
411610
; CHECK-NEXT: vpop {d8, d9, d10, d11}
412611
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}

0 commit comments

Comments
 (0)