@@ -497,49 +497,21 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
497497;
498498; TF-FIXEDLEN-LABEL: @conditional_uniform_load(
499499; TF-FIXEDLEN-NEXT: entry:
500- ; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
501- ; TF-FIXEDLEN: vector.ph:
502- ; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[B:%.*]], i64 0
503- ; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
504- ; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]]
505- ; TF-FIXEDLEN: vector.body:
506- ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
507- ; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
508- ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
509- ; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025)
510- ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], <i64 10, i64 10, i64 10, i64 10>
511- ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer
512- ; TF-FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP2]], <4 x i64> poison)
513- ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP1]], <i1 true, i1 true, i1 true, i1 true>
514- ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
515- ; TF-FIXEDLEN-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP2]], [[TMP4]]
516- ; TF-FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer
517- ; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
518- ; TF-FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
519- ; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[PREDPHI]], ptr [[TMP7]], i32 8, <4 x i1> [[TMP6]])
520- ; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
521- ; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
522- ; TF-FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028
523- ; TF-FIXEDLEN-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
524- ; TF-FIXEDLEN: middle.block:
525- ; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
526- ; TF-FIXEDLEN: scalar.ph:
527- ; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
528500; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]]
529501; TF-FIXEDLEN: for.body:
530- ; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]] , [[SCALAR_PH ]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
502+ ; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ 0 , [[ENTRY:%.* ]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
531503; TF-FIXEDLEN-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10
532504; TF-FIXEDLEN-NEXT: br i1 [[CMP]], label [[DO_LOAD:%.*]], label [[LATCH]]
533505; TF-FIXEDLEN: do_load:
534- ; TF-FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8
506+ ; TF-FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B:%.* ]], align 8
535507; TF-FIXEDLEN-NEXT: br label [[LATCH]]
536508; TF-FIXEDLEN: latch:
537509; TF-FIXEDLEN-NEXT: [[PHI:%.*]] = phi i64 [ 0, [[FOR_BODY]] ], [ [[V]], [[DO_LOAD]] ]
538- ; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
510+ ; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.* ]], i64 [[IV]]
539511; TF-FIXEDLEN-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8
540512; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
541513; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025
542- ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+ ]]
514+ ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.* ]], label [[FOR_BODY]]
543515; TF-FIXEDLEN: for.end:
544516; TF-FIXEDLEN-NEXT: ret void
545517;
@@ -708,7 +680,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt
708680; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]])
709681; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
710682; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028
711- ; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6 :![0-9]+]]
683+ ; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4 :![0-9]+]]
712684; TF-FIXEDLEN: middle.block:
713685; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
714686; TF-FIXEDLEN: scalar.ph:
@@ -721,7 +693,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt
721693; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
722694; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
723695; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025
724- ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7 :![0-9]+]]
696+ ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5 :![0-9]+]]
725697; TF-FIXEDLEN: for.end:
726698; TF-FIXEDLEN-NEXT: ret void
727699;
@@ -883,7 +855,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
883855; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]])
884856; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
885857; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028
886- ; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8 :![0-9]+]]
858+ ; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6 :![0-9]+]]
887859; TF-FIXEDLEN: middle.block:
888860; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
889861; TF-FIXEDLEN: scalar.ph:
@@ -896,7 +868,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
896868; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
897869; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
898870; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025
899- ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9 :![0-9]+]]
871+ ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7 :![0-9]+]]
900872; TF-FIXEDLEN: for.end:
901873; TF-FIXEDLEN-NEXT: ret void
902874;
@@ -1114,7 +1086,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
11141086; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP9]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]])
11151087; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
11161088; TF-FIXEDLEN-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028
1117- ; TF-FIXEDLEN-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10 :![0-9]+]]
1089+ ; TF-FIXEDLEN-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8 :![0-9]+]]
11181090; TF-FIXEDLEN: middle.block:
11191091; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
11201092; TF-FIXEDLEN: scalar.ph:
@@ -1127,7 +1099,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
11271099; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
11281100; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
11291101; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025
1130- ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11 :![0-9]+]]
1102+ ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9 :![0-9]+]]
11311103; TF-FIXEDLEN: for.end:
11321104; TF-FIXEDLEN-NEXT: ret void
11331105;
@@ -1353,7 +1325,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
13531325; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
13541326; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
13551327; TF-FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028
1356- ; TF-FIXEDLEN-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12 :![0-9]+]]
1328+ ; TF-FIXEDLEN-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10 :![0-9]+]]
13571329; TF-FIXEDLEN: middle.block:
13581330; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
13591331; TF-FIXEDLEN: scalar.ph:
@@ -1371,7 +1343,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
13711343; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
13721344; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
13731345; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025
1374- ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13 :![0-9]+]]
1346+ ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11 :![0-9]+]]
13751347; TF-FIXEDLEN: for.end:
13761348; TF-FIXEDLEN-NEXT: ret void
13771349;
@@ -1539,7 +1511,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap
15391511; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]])
15401512; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
15411513; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028
1542- ; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14 :![0-9]+]]
1514+ ; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12 :![0-9]+]]
15431515; TF-FIXEDLEN: middle.block:
15441516; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
15451517; TF-FIXEDLEN: scalar.ph:
@@ -1552,7 +1524,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap
15521524; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
15531525; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
15541526; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025
1555- ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15 :![0-9]+]]
1527+ ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13 :![0-9]+]]
15561528; TF-FIXEDLEN: for.end:
15571529; TF-FIXEDLEN-NEXT: ret void
15581530;
0 commit comments