@@ -223,6 +223,72 @@ exit:
223223 ret i32 %res
224224}
225225
226+ define <4 x i32 > @test_vector_add_reduction (ptr %a , i64 %n ) {
227+ ; CHECK-LABEL: define <4 x i32> @test_vector_add_reduction(
228+ ; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
229+ ; CHECK-NEXT: [[ENTRY:.*]]:
230+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
231+ ; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
232+ ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
233+ ; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
234+ ; CHECK: [[ENTRY_NEW]]:
235+ ; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
236+ ; CHECK-NEXT: br label %[[LOOP:.*]]
237+ ; CHECK: [[LOOP]]:
238+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
239+ ; CHECK-NEXT: [[RDX_1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
240+ ; CHECK-NEXT: [[RDX:%.*]] = phi <4 x i32> [ zeroinitializer, %[[ENTRY_NEW]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
241+ ; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
242+ ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw <4 x i32>, ptr [[A]], i64 [[IV]]
243+ ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[GEP_A]], align 16
244+ ; CHECK-NEXT: [[RDX_NEXT]] = add <4 x i32> [[RDX]], [[TMP2]]
245+ ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
246+ ; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw <4 x i32>, ptr [[A]], i64 [[IV_NEXT]]
247+ ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[GEP_A_1]], align 16
248+ ; CHECK-NEXT: [[RDX_NEXT_1]] = add <4 x i32> [[RDX_1]], [[TMP3]]
249+ ; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
250+ ; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
251+ ; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
252+ ; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
253+ ; CHECK: [[EXIT_UNR_LCSSA]]:
254+ ; CHECK-NEXT: [[RES_PH:%.*]] = phi <4 x i32> [ [[RDX_NEXT_1]], %[[LOOP]] ]
255+ ; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
256+ ; CHECK-NEXT: [[RDX_UNR:%.*]] = phi <4 x i32> [ [[RDX_NEXT_1]], %[[LOOP]] ]
257+ ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[RDX_NEXT_1]], [[RDX_NEXT]]
258+ ; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
259+ ; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
260+ ; CHECK: [[LOOP_EPIL_PREHEADER]]:
261+ ; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
262+ ; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi <4 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA]] ]
263+ ; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
264+ ; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]])
265+ ; CHECK-NEXT: br label %[[LOOP_EPIL:.*]]
266+ ; CHECK: [[LOOP_EPIL]]:
267+ ; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw <4 x i32>, ptr [[A]], i64 [[IV_EPIL_INIT]]
268+ ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[GEP_A_EPIL]], align 16
269+ ; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = add <4 x i32> [[RDX_EPIL_INIT]], [[TMP4]]
270+ ; CHECK-NEXT: br label %[[EXIT]]
271+ ; CHECK: [[EXIT]]:
272+ ; CHECK-NEXT: [[RES:%.*]] = phi <4 x i32> [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
273+ ; CHECK-NEXT: ret <4 x i32> [[RES]]
274+ ;
275+ entry:
276+ br label %loop
277+
278+ loop:
279+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
280+ %rdx = phi <4 x i32 > [ zeroinitializer , %entry ], [ %rdx.next , %loop ]
281+ %gep.a = getelementptr inbounds nuw <4 x i32 >, ptr %a , i64 %iv
282+ %1 = load <4 x i32 >, ptr %gep.a , align 16
283+ %rdx.next = add <4 x i32 > %rdx , %1
284+ %iv.next = add nuw nsw i64 %iv , 1
285+ %ec = icmp eq i64 %iv.next , %n
286+ br i1 %ec , label %exit , label %loop , !llvm.loop !0
287+
288+ exit:
289+ %res = phi <4 x i32 > [ %rdx.next , %loop ]
290+ ret <4 x i32 > %res
291+ }
226292
227293
228294!0 = distinct !{!0 , !1 }
@@ -237,4 +303,5 @@ exit:
237303; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
238304; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
239305; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]}
306+ ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
240307;.
0 commit comments