11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve %s -o - | FileCheck %s
33
4- define arm_aapcs_vfpcc <16 x i8 > @vabd_s8 (<16 x i8 > %src1 , <16 x i8 > %src2 ) {
5- ; CHECK-LABEL: vabd_s8 :
4+ define arm_aapcs_vfpcc <16 x i8 > @vabd_v16s8 (<16 x i8 > %src1 , <16 x i8 > %src2 ) {
5+ ; CHECK-LABEL: vabd_v16s8 :
66; CHECK: @ %bb.0:
77; CHECK-NEXT: vabd.s8 q0, q0, q1
88; CHECK-NEXT: bx lr
@@ -16,8 +16,46 @@ define arm_aapcs_vfpcc <16 x i8> @vabd_s8(<16 x i8> %src1, <16 x i8> %src2) {
1616 ret <16 x i8 > %result
1717}
1818
19- define arm_aapcs_vfpcc <8 x i16 > @vabd_s16 (<8 x i16 > %src1 , <8 x i16 > %src2 ) {
20- ; CHECK-LABEL: vabd_s16:
19+ define arm_aapcs_vfpcc <8 x i8 > @vabd_v8s8 (<8 x i8 > %src1 , <8 x i8 > %src2 ) {
20+ ; CHECK-LABEL: vabd_v8s8:
21+ ; CHECK: @ %bb.0:
22+ ; CHECK-NEXT: vmovlb.s8 q1, q1
23+ ; CHECK-NEXT: vmovlb.s8 q0, q0
24+ ; CHECK-NEXT: vsub.i16 q0, q0, q1
25+ ; CHECK-NEXT: vabs.s16 q0, q0
26+ ; CHECK-NEXT: bx lr
27+ %sextsrc1 = sext <8 x i8 > %src1 to <8 x i16 >
28+ %sextsrc2 = sext <8 x i8 > %src2 to <8 x i16 >
29+ %add1 = sub <8 x i16 > %sextsrc1 , %sextsrc2
30+ %add2 = sub <8 x i16 > zeroinitializer , %add1
31+ %c = icmp sge <8 x i16 > %add1 , zeroinitializer
32+ %s = select <8 x i1 > %c , <8 x i16 > %add1 , <8 x i16 > %add2
33+ %result = trunc <8 x i16 > %s to <8 x i8 >
34+ ret <8 x i8 > %result
35+ }
36+
37+ define arm_aapcs_vfpcc <4 x i8 > @vabd_v4s8 (<4 x i8 > %src1 , <4 x i8 > %src2 ) {
38+ ; CHECK-LABEL: vabd_v4s8:
39+ ; CHECK: @ %bb.0:
40+ ; CHECK-NEXT: vmovlb.s8 q1, q1
41+ ; CHECK-NEXT: vmovlb.s8 q0, q0
42+ ; CHECK-NEXT: vmovlb.s16 q1, q1
43+ ; CHECK-NEXT: vmovlb.s16 q0, q0
44+ ; CHECK-NEXT: vsub.i32 q0, q0, q1
45+ ; CHECK-NEXT: vabs.s32 q0, q0
46+ ; CHECK-NEXT: bx lr
47+ %sextsrc1 = sext <4 x i8 > %src1 to <4 x i16 >
48+ %sextsrc2 = sext <4 x i8 > %src2 to <4 x i16 >
49+ %add1 = sub <4 x i16 > %sextsrc1 , %sextsrc2
50+ %add2 = sub <4 x i16 > zeroinitializer , %add1
51+ %c = icmp sge <4 x i16 > %add1 , zeroinitializer
52+ %s = select <4 x i1 > %c , <4 x i16 > %add1 , <4 x i16 > %add2
53+ %result = trunc <4 x i16 > %s to <4 x i8 >
54+ ret <4 x i8 > %result
55+ }
56+
57+ define arm_aapcs_vfpcc <8 x i16 > @vabd_v8s16 (<8 x i16 > %src1 , <8 x i16 > %src2 ) {
58+ ; CHECK-LABEL: vabd_v8s16:
2159; CHECK: @ %bb.0:
2260; CHECK-NEXT: vabd.s16 q0, q0, q1
2361; CHECK-NEXT: bx lr
@@ -31,8 +69,26 @@ define arm_aapcs_vfpcc <8 x i16> @vabd_s16(<8 x i16> %src1, <8 x i16> %src2) {
3169 ret <8 x i16 > %result
3270}
3371
34- define arm_aapcs_vfpcc <4 x i32 > @vabd_s32 (<4 x i32 > %src1 , <4 x i32 > %src2 ) {
35- ; CHECK-LABEL: vabd_s32:
72+ define arm_aapcs_vfpcc <4 x i16 > @vabd_v4s16 (<4 x i16 > %src1 , <4 x i16 > %src2 ) {
73+ ; CHECK-LABEL: vabd_v4s16:
74+ ; CHECK: @ %bb.0:
75+ ; CHECK-NEXT: vmovlb.s16 q1, q1
76+ ; CHECK-NEXT: vmovlb.s16 q0, q0
77+ ; CHECK-NEXT: vsub.i32 q0, q0, q1
78+ ; CHECK-NEXT: vabs.s32 q0, q0
79+ ; CHECK-NEXT: bx lr
80+ %sextsrc1 = sext <4 x i16 > %src1 to <4 x i32 >
81+ %sextsrc2 = sext <4 x i16 > %src2 to <4 x i32 >
82+ %add1 = sub <4 x i32 > %sextsrc1 , %sextsrc2
83+ %add2 = sub <4 x i32 > zeroinitializer , %add1
84+ %c = icmp sge <4 x i32 > %add1 , zeroinitializer
85+ %s = select <4 x i1 > %c , <4 x i32 > %add1 , <4 x i32 > %add2
86+ %result = trunc <4 x i32 > %s to <4 x i16 >
87+ ret <4 x i16 > %result
88+ }
89+
90+ define arm_aapcs_vfpcc <4 x i32 > @vabd_v4s32 (<4 x i32 > %src1 , <4 x i32 > %src2 ) {
91+ ; CHECK-LABEL: vabd_v4s32:
3692; CHECK: @ %bb.0:
3793; CHECK-NEXT: vabd.s32 q0, q0, q1
3894; CHECK-NEXT: bx lr
@@ -46,8 +102,44 @@ define arm_aapcs_vfpcc <4 x i32> @vabd_s32(<4 x i32> %src1, <4 x i32> %src2) {
46102 ret <4 x i32 > %result
47103}
48104
49- define arm_aapcs_vfpcc <16 x i8 > @vabd_u8 (<16 x i8 > %src1 , <16 x i8 > %src2 ) {
50- ; CHECK-LABEL: vabd_u8:
105+ define arm_aapcs_vfpcc <2 x i32 > @vabd_v2s32 (<2 x i32 > %src1 , <2 x i32 > %src2 ) {
106+ ; CHECK-LABEL: vabd_v2s32:
107+ ; CHECK: @ %bb.0:
108+ ; CHECK-NEXT: .save {r7, lr}
109+ ; CHECK-NEXT: push {r7, lr}
110+ ; CHECK-NEXT: vmov r0, s2
111+ ; CHECK-NEXT: vmov r2, s6
112+ ; CHECK-NEXT: vmov r3, s0
113+ ; CHECK-NEXT: asrs r1, r0, #31
114+ ; CHECK-NEXT: subs r0, r0, r2
115+ ; CHECK-NEXT: sbc.w r1, r1, r2, asr #31
116+ ; CHECK-NEXT: asrs r2, r3, #31
117+ ; CHECK-NEXT: adds.w r0, r0, r1, asr #31
118+ ; CHECK-NEXT: eor.w lr, r0, r1, asr #31
119+ ; CHECK-NEXT: vmov r0, s4
120+ ; CHECK-NEXT: adc.w r12, r1, r1, asr #31
121+ ; CHECK-NEXT: eor.w r1, r12, r1, asr #31
122+ ; CHECK-NEXT: subs r3, r3, r0
123+ ; CHECK-NEXT: sbc.w r0, r2, r0, asr #31
124+ ; CHECK-NEXT: adds.w r2, r3, r0, asr #31
125+ ; CHECK-NEXT: eor.w r2, r2, r0, asr #31
126+ ; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
127+ ; CHECK-NEXT: adc.w r2, r0, r0, asr #31
128+ ; CHECK-NEXT: eor.w r0, r2, r0, asr #31
129+ ; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
130+ ; CHECK-NEXT: pop {r7, pc}
131+ %sextsrc1 = sext <2 x i32 > %src1 to <2 x i64 >
132+ %sextsrc2 = sext <2 x i32 > %src2 to <2 x i64 >
133+ %add1 = sub <2 x i64 > %sextsrc1 , %sextsrc2
134+ %add2 = sub <2 x i64 > zeroinitializer , %add1
135+ %c = icmp sge <2 x i64 > %add1 , zeroinitializer
136+ %s = select <2 x i1 > %c , <2 x i64 > %add1 , <2 x i64 > %add2
137+ %result = trunc <2 x i64 > %s to <2 x i32 >
138+ ret <2 x i32 > %result
139+ }
140+
141+ define arm_aapcs_vfpcc <16 x i8 > @vabd_v16u8 (<16 x i8 > %src1 , <16 x i8 > %src2 ) {
142+ ; CHECK-LABEL: vabd_v16u8:
51143; CHECK: @ %bb.0:
52144; CHECK-NEXT: vabd.u8 q0, q0, q1
53145; CHECK-NEXT: bx lr
@@ -61,8 +153,45 @@ define arm_aapcs_vfpcc <16 x i8> @vabd_u8(<16 x i8> %src1, <16 x i8> %src2) {
61153 ret <16 x i8 > %result
62154}
63155
64- define arm_aapcs_vfpcc <8 x i16 > @vabd_u16 (<8 x i16 > %src1 , <8 x i16 > %src2 ) {
65- ; CHECK-LABEL: vabd_u16:
156+ define arm_aapcs_vfpcc <8 x i8 > @vabd_v8u8 (<8 x i8 > %src1 , <8 x i8 > %src2 ) {
157+ ; CHECK-LABEL: vabd_v8u8:
158+ ; CHECK: @ %bb.0:
159+ ; CHECK-NEXT: vmovlb.u8 q1, q1
160+ ; CHECK-NEXT: vmovlb.u8 q0, q0
161+ ; CHECK-NEXT: vsub.i16 q0, q0, q1
162+ ; CHECK-NEXT: vabs.s16 q0, q0
163+ ; CHECK-NEXT: bx lr
164+ %zextsrc1 = zext <8 x i8 > %src1 to <8 x i16 >
165+ %zextsrc2 = zext <8 x i8 > %src2 to <8 x i16 >
166+ %add1 = sub <8 x i16 > %zextsrc1 , %zextsrc2
167+ %add2 = sub <8 x i16 > zeroinitializer , %add1
168+ %c = icmp sge <8 x i16 > %add1 , zeroinitializer
169+ %s = select <8 x i1 > %c , <8 x i16 > %add1 , <8 x i16 > %add2
170+ %result = trunc <8 x i16 > %s to <8 x i8 >
171+ ret <8 x i8 > %result
172+ }
173+
174+ define arm_aapcs_vfpcc <4 x i8 > @vabd_v4u8 (<4 x i8 > %src1 , <4 x i8 > %src2 ) {
175+ ; CHECK-LABEL: vabd_v4u8:
176+ ; CHECK: @ %bb.0:
177+ ; CHECK-NEXT: vmov.i32 q2, #0xff
178+ ; CHECK-NEXT: vand q1, q1, q2
179+ ; CHECK-NEXT: vand q0, q0, q2
180+ ; CHECK-NEXT: vsub.i32 q0, q0, q1
181+ ; CHECK-NEXT: vabs.s32 q0, q0
182+ ; CHECK-NEXT: bx lr
183+ %zextsrc1 = zext <4 x i8 > %src1 to <4 x i16 >
184+ %zextsrc2 = zext <4 x i8 > %src2 to <4 x i16 >
185+ %add1 = sub <4 x i16 > %zextsrc1 , %zextsrc2
186+ %add2 = sub <4 x i16 > zeroinitializer , %add1
187+ %c = icmp sge <4 x i16 > %add1 , zeroinitializer
188+ %s = select <4 x i1 > %c , <4 x i16 > %add1 , <4 x i16 > %add2
189+ %result = trunc <4 x i16 > %s to <4 x i8 >
190+ ret <4 x i8 > %result
191+ }
192+
193+ define arm_aapcs_vfpcc <8 x i16 > @vabd_v8u16 (<8 x i16 > %src1 , <8 x i16 > %src2 ) {
194+ ; CHECK-LABEL: vabd_v8u16:
66195; CHECK: @ %bb.0:
67196; CHECK-NEXT: vabd.u16 q0, q0, q1
68197; CHECK-NEXT: bx lr
@@ -76,6 +205,24 @@ define arm_aapcs_vfpcc <8 x i16> @vabd_u16(<8 x i16> %src1, <8 x i16> %src2) {
76205 ret <8 x i16 > %result
77206}
78207
208+ define arm_aapcs_vfpcc <4 x i16 > @vabd_v4u16 (<4 x i16 > %src1 , <4 x i16 > %src2 ) {
209+ ; CHECK-LABEL: vabd_v4u16:
210+ ; CHECK: @ %bb.0:
211+ ; CHECK-NEXT: vmovlb.u16 q1, q1
212+ ; CHECK-NEXT: vmovlb.u16 q0, q0
213+ ; CHECK-NEXT: vsub.i32 q0, q0, q1
214+ ; CHECK-NEXT: vabs.s32 q0, q0
215+ ; CHECK-NEXT: bx lr
216+ %zextsrc1 = zext <4 x i16 > %src1 to <4 x i32 >
217+ %zextsrc2 = zext <4 x i16 > %src2 to <4 x i32 >
218+ %add1 = sub <4 x i32 > %zextsrc1 , %zextsrc2
219+ %add2 = sub <4 x i32 > zeroinitializer , %add1
220+ %c = icmp sge <4 x i32 > %add1 , zeroinitializer
221+ %s = select <4 x i1 > %c , <4 x i32 > %add1 , <4 x i32 > %add2
222+ %result = trunc <4 x i32 > %s to <4 x i16 >
223+ ret <4 x i16 > %result
224+ }
225+
79226define arm_aapcs_vfpcc <4 x i32 > @vabd_u32 (<4 x i32 > %src1 , <4 x i32 > %src2 ) {
80227; CHECK-LABEL: vabd_u32:
81228; CHECK: @ %bb.0:
@@ -91,19 +238,71 @@ define arm_aapcs_vfpcc <4 x i32> @vabd_u32(<4 x i32> %src1, <4 x i32> %src2) {
91238 ret <4 x i32 > %result
92239}
93240
241+ define arm_aapcs_vfpcc <4 x i32 > @vabd_v4u32 (<4 x i32 > %src1 , <4 x i32 > %src2 ) {
242+ ; CHECK-LABEL: vabd_v4u32:
243+ ; CHECK: @ %bb.0:
244+ ; CHECK-NEXT: vabd.u32 q0, q0, q1
245+ ; CHECK-NEXT: bx lr
246+ %zextsrc1 = zext <4 x i32 > %src1 to <4 x i64 >
247+ %zextsrc2 = zext <4 x i32 > %src2 to <4 x i64 >
248+ %add1 = sub <4 x i64 > %zextsrc1 , %zextsrc2
249+ %add2 = sub <4 x i64 > zeroinitializer , %add1
250+ %c = icmp sge <4 x i64 > %add1 , zeroinitializer
251+ %s = select <4 x i1 > %c , <4 x i64 > %add1 , <4 x i64 > %add2
252+ %result = trunc <4 x i64 > %s to <4 x i32 >
253+ ret <4 x i32 > %result
254+ }
255+
256+ define arm_aapcs_vfpcc <2 x i32 > @vabd_v2u32 (<2 x i32 > %src1 , <2 x i32 > %src2 ) {
257+ ; CHECK-LABEL: vabd_v2u32:
258+ ; CHECK: @ %bb.0:
259+ ; CHECK-NEXT: .save {r4, lr}
260+ ; CHECK-NEXT: push {r4, lr}
261+ ; CHECK-NEXT: vmov.i64 q2, #0xffffffff
262+ ; CHECK-NEXT: vand q1, q1, q2
263+ ; CHECK-NEXT: vand q0, q0, q2
264+ ; CHECK-NEXT: vmov r0, r1, d3
265+ ; CHECK-NEXT: vmov r2, r3, d1
266+ ; CHECK-NEXT: subs r0, r2, r0
267+ ; CHECK-NEXT: sbc.w r1, r3, r1
268+ ; CHECK-NEXT: vmov r3, r2, d2
269+ ; CHECK-NEXT: adds.w r0, r0, r1, asr #31
270+ ; CHECK-NEXT: eor.w lr, r0, r1, asr #31
271+ ; CHECK-NEXT: vmov r0, r4, d0
272+ ; CHECK-NEXT: adc.w r12, r1, r1, asr #31
273+ ; CHECK-NEXT: subs r0, r0, r3
274+ ; CHECK-NEXT: sbc.w r2, r4, r2
275+ ; CHECK-NEXT: adds.w r0, r0, r2, asr #31
276+ ; CHECK-NEXT: eor.w r0, r0, r2, asr #31
277+ ; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
278+ ; CHECK-NEXT: eor.w r0, r12, r1, asr #31
279+ ; CHECK-NEXT: adc.w r1, r2, r2, asr #31
280+ ; CHECK-NEXT: eor.w r1, r1, r2, asr #31
281+ ; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
282+ ; CHECK-NEXT: pop {r4, pc}
283+ %zextsrc1 = zext <2 x i32 > %src1 to <2 x i64 >
284+ %zextsrc2 = zext <2 x i32 > %src2 to <2 x i64 >
285+ %add1 = sub <2 x i64 > %zextsrc1 , %zextsrc2
286+ %add2 = sub <2 x i64 > zeroinitializer , %add1
287+ %c = icmp sge <2 x i64 > %add1 , zeroinitializer
288+ %s = select <2 x i1 > %c , <2 x i64 > %add1 , <2 x i64 > %add2
289+ %result = trunc <2 x i64 > %s to <2 x i32 >
290+ ret <2 x i32 > %result
291+ }
292+
94293define void @vabd_loop_s8 (i8* nocapture readonly %x , i8* nocapture readonly %y , i8* noalias nocapture %z , i32 %n ) {
95294; CHECK-LABEL: vabd_loop_s8:
96295; CHECK: @ %bb.0: @ %entry
97296; CHECK-NEXT: .save {r7, lr}
98297; CHECK-NEXT: push {r7, lr}
99298; CHECK-NEXT: mov.w lr, #64
100- ; CHECK-NEXT: .LBB6_1 : @ %vector.body
299+ ; CHECK-NEXT: .LBB15_1 : @ %vector.body
101300; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
102301; CHECK-NEXT: vldrb.u8 q0, [r1], #16
103302; CHECK-NEXT: vldrb.u8 q1, [r0], #16
104303; CHECK-NEXT: vabd.s8 q0, q1, q0
105304; CHECK-NEXT: vstrb.8 q0, [r2], #16
106- ; CHECK-NEXT: le lr, .LBB6_1
305+ ; CHECK-NEXT: le lr, .LBB15_1
107306; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
108307; CHECK-NEXT: pop {r7, pc}
109308entry:
@@ -141,13 +340,13 @@ define void @vabd_loop_s16(i16* nocapture readonly %x, i16* nocapture readonly %
141340; CHECK-NEXT: .save {r7, lr}
142341; CHECK-NEXT: push {r7, lr}
143342; CHECK-NEXT: mov.w lr, #128
144- ; CHECK-NEXT: .LBB7_1 : @ %vector.body
343+ ; CHECK-NEXT: .LBB16_1 : @ %vector.body
145344; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
146345; CHECK-NEXT: vldrh.u16 q0, [r1], #16
147346; CHECK-NEXT: vldrh.u16 q1, [r0], #16
148347; CHECK-NEXT: vabd.s16 q0, q1, q0
149348; CHECK-NEXT: vstrb.8 q0, [r2], #16
150- ; CHECK-NEXT: le lr, .LBB7_1
349+ ; CHECK-NEXT: le lr, .LBB16_1
151350; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
152351; CHECK-NEXT: pop {r7, pc}
153352entry:
@@ -186,7 +385,7 @@ define void @vabd_loop_s32(i32* nocapture readonly %x, i32* nocapture readonly %
186385; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
187386; CHECK-NEXT: mov.w lr, #256
188387; CHECK-NEXT: vmov.i32 q0, #0x0
189- ; CHECK-NEXT: .LBB8_1 : @ %vector.body
388+ ; CHECK-NEXT: .LBB17_1 : @ %vector.body
190389; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
191390; CHECK-NEXT: vldrw.u32 q1, [r0], #16
192391; CHECK-NEXT: vmov.f32 s8, s6
@@ -231,7 +430,7 @@ define void @vabd_loop_s32(i32* nocapture readonly %x, i32* nocapture readonly %
231430; CHECK-NEXT: vpst
232431; CHECK-NEXT: vsubt.i32 q2, q0, q2
233432; CHECK-NEXT: vstrb.8 q2, [r2], #16
234- ; CHECK-NEXT: le lr, .LBB8_1
433+ ; CHECK-NEXT: le lr, .LBB17_1
235434; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
236435; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
237436entry:
@@ -269,13 +468,13 @@ define void @vabd_loop_u8(i8* nocapture readonly %x, i8* nocapture readonly %y,
269468; CHECK-NEXT: .save {r7, lr}
270469; CHECK-NEXT: push {r7, lr}
271470; CHECK-NEXT: mov.w lr, #64
272- ; CHECK-NEXT: .LBB9_1 : @ %vector.body
471+ ; CHECK-NEXT: .LBB18_1 : @ %vector.body
273472; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
274473; CHECK-NEXT: vldrb.u8 q0, [r1], #16
275474; CHECK-NEXT: vldrb.u8 q1, [r0], #16
276475; CHECK-NEXT: vabd.u8 q0, q1, q0
277476; CHECK-NEXT: vstrb.8 q0, [r2], #16
278- ; CHECK-NEXT: le lr, .LBB9_1
477+ ; CHECK-NEXT: le lr, .LBB18_1
279478; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
280479; CHECK-NEXT: pop {r7, pc}
281480entry:
@@ -313,13 +512,13 @@ define void @vabd_loop_u16(i16* nocapture readonly %x, i16* nocapture readonly %
313512; CHECK-NEXT: .save {r7, lr}
314513; CHECK-NEXT: push {r7, lr}
315514; CHECK-NEXT: mov.w lr, #128
316- ; CHECK-NEXT: .LBB10_1 : @ %vector.body
515+ ; CHECK-NEXT: .LBB19_1 : @ %vector.body
317516; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
318517; CHECK-NEXT: vldrh.u16 q0, [r1], #16
319518; CHECK-NEXT: vldrh.u16 q1, [r0], #16
320519; CHECK-NEXT: vabd.u16 q0, q1, q0
321520; CHECK-NEXT: vstrb.8 q0, [r2], #16
322- ; CHECK-NEXT: le lr, .LBB10_1
521+ ; CHECK-NEXT: le lr, .LBB19_1
323522; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
324523; CHECK-NEXT: pop {r7, pc}
325524entry:
@@ -361,7 +560,7 @@ define void @vabd_loop_u32(i32* nocapture readonly %x, i32* nocapture readonly %
361560; CHECK-NEXT: mov.w lr, #256
362561; CHECK-NEXT: vmov.i64 q0, #0xffffffff
363562; CHECK-NEXT: vmov.i32 q1, #0x0
364- ; CHECK-NEXT: .LBB11_1 : @ %vector.body
563+ ; CHECK-NEXT: .LBB20_1 : @ %vector.body
365564; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
366565; CHECK-NEXT: vldrw.u32 q4, [r1], #16
367566; CHECK-NEXT: vldrw.u32 q5, [r0], #16
@@ -406,7 +605,7 @@ define void @vabd_loop_u32(i32* nocapture readonly %x, i32* nocapture readonly %
406605; CHECK-NEXT: vpst
407606; CHECK-NEXT: vsubt.i32 q4, q1, q4
408607; CHECK-NEXT: vstrb.8 q4, [r2], #16
409- ; CHECK-NEXT: le lr, .LBB11_1
608+ ; CHECK-NEXT: le lr, .LBB20_1
410609; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
411610; CHECK-NEXT: vpop {d8, d9, d10, d11}
412611; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
0 commit comments