Skip to content

Commit 49605a4

Browse files
committed
[LV] Set correct costs for interleave group members.
This ensures each scalarized member has an accurate cost, matching the cost it would have if it would not have been considered for an interleave group.
1 parent 398f6c2 commit 49605a4

File tree

2 files changed

+42
-191
lines changed

2 files changed

+42
-191
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5674,9 +5674,18 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
56745674
// If the instructions belongs to an interleave group, the whole group
56755675
// receives the same decision. The whole group receives the cost, but
56765676
// the cost will actually be assigned to one instruction.
5677-
if (const auto *Group = getInterleavedAccessGroup(&I))
5678-
setWideningDecision(Group, VF, Decision, Cost);
5679-
else
5677+
if (const auto *Group = getInterleavedAccessGroup(&I)) {
5678+
if (Decision == CM_Scalarize) {
5679+
for (unsigned Idx = 0; Idx < Group->getFactor(); ++Idx) {
5680+
if (auto *I = Group->getMember(Idx)) {
5681+
setWideningDecision(I, VF, Decision,
5682+
getMemInstScalarizationCost(I, VF));
5683+
}
5684+
}
5685+
} else {
5686+
setWideningDecision(Group, VF, Decision, Cost);
5687+
}
5688+
} else
56805689
setWideningDecision(&I, VF, Decision, Cost);
56815690
}
56825691
}

llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll

Lines changed: 30 additions & 188 deletions
Original file line numberDiff line numberDiff line change
@@ -328,211 +328,53 @@ declare void @init(ptr)
328328
define void @scalar_store_cost_after_discarding_interleave_group(ptr %dst, i32 %x, ptr %src) {
329329
; CHECK-LABEL: define void @scalar_store_cost_after_discarding_interleave_group(
330330
; CHECK-SAME: ptr [[DST:%.*]], i32 [[X:%.*]], ptr [[SRC:%.*]]) {
331-
; CHECK-NEXT: [[ENTRY:.*:]]
331+
; CHECK-NEXT: [[ENTRY:.*]]:
332332
; CHECK-NEXT: [[TEMP1:%.*]] = alloca [64 x i32], align 4
333333
; CHECK-NEXT: call void @init(ptr [[TEMP1]])
334-
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
335-
; CHECK: [[VECTOR_PH]]:
336-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[X]], i64 0
337-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
338-
; CHECK-NEXT: [[TMP0:%.*]] = lshr <8 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
339-
; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i32> [[BROADCAST_SPLAT]], splat (i32 -171254)
340-
; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i32> [[TMP1]], splat (i32 1)
341-
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP0]], [[TMP2]]
342-
; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i32> [[TMP3]], splat (i32 1)
343-
; CHECK-NEXT: [[TMP5:%.*]] = lshr <8 x i32> [[TMP4]], splat (i32 1)
344-
; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i32> [[TMP5]] to <8 x i16>
345-
; CHECK-NEXT: [[TMP7:%.*]] = sub <8 x i32> zeroinitializer, [[TMP1]]
346-
; CHECK-NEXT: [[TMP8:%.*]] = lshr <8 x i32> [[TMP7]], splat (i32 1)
347-
; CHECK-NEXT: [[TMP9:%.*]] = trunc <8 x i32> [[TMP8]] to <8 x i16>
348-
; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
349-
; CHECK-NEXT: [[TMP11:%.*]] = add <8 x i32> [[TMP10]], splat (i32 1)
350-
; CHECK-NEXT: [[TMP12:%.*]] = lshr <8 x i32> [[TMP11]], splat (i32 1)
351-
; CHECK-NEXT: [[TMP13:%.*]] = trunc <8 x i32> [[TMP12]] to <8 x i16>
352-
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
353-
; CHECK: [[VECTOR_BODY]]:
354-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
355-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
356-
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 0
357-
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 8
358-
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 16
359-
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], 24
360-
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[OFFSET_IDX]], 32
361-
; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[OFFSET_IDX]], 40
362-
; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[OFFSET_IDX]], 48
363-
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 56
334+
; CHECK-NEXT: br label %[[LOOP:.*]]
335+
; CHECK: [[LOOP]]:
336+
; CHECK-NEXT: [[TMP21:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
364337
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TEMP1]], align 4
365-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[TMP22]], i64 0
366-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer
367-
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP14]]
368-
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP15]]
369-
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP16]]
370-
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP17]]
371-
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP18]]
372-
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP19]]
373-
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP20]]
338+
; CHECK-NEXT: [[SHR_0:%.*]] = lshr i32 [[X]], 1
339+
; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[X]], -171254
340+
; CHECK-NEXT: [[SHR_1:%.*]] = lshr i32 [[MUL_0]], 1
341+
; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[SHR_0]], [[SHR_1]]
374342
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP21]]
375-
; CHECK-NEXT: store i16 0, ptr [[TMP23]], align 2
376-
; CHECK-NEXT: store i16 0, ptr [[TMP24]], align 2
377-
; CHECK-NEXT: store i16 0, ptr [[TMP25]], align 2
378-
; CHECK-NEXT: store i16 0, ptr [[TMP26]], align 2
379-
; CHECK-NEXT: store i16 0, ptr [[TMP27]], align 2
380-
; CHECK-NEXT: store i16 0, ptr [[TMP28]], align 2
381-
; CHECK-NEXT: store i16 0, ptr [[TMP29]], align 2
382343
; CHECK-NEXT: store i16 0, ptr [[TMP30]], align 2
383-
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[TMP23]], i64 14
384-
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[TMP24]], i64 14
385-
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[TMP25]], i64 14
386-
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP26]], i64 14
387-
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[TMP27]], i64 14
388-
; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[TMP28]], i64 14
389-
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[TMP29]], i64 14
390-
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[TMP30]], i64 14
391-
; CHECK-NEXT: store i16 0, ptr [[TMP31]], align 2
392-
; CHECK-NEXT: store i16 0, ptr [[TMP32]], align 2
393-
; CHECK-NEXT: store i16 0, ptr [[TMP33]], align 2
394-
; CHECK-NEXT: store i16 0, ptr [[TMP34]], align 2
395-
; CHECK-NEXT: store i16 0, ptr [[TMP35]], align 2
396-
; CHECK-NEXT: store i16 0, ptr [[TMP36]], align 2
397-
; CHECK-NEXT: store i16 0, ptr [[TMP37]], align 2
344+
; CHECK-NEXT: [[GEP_0_1:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP21]]
345+
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[GEP_0_1]], i64 14
398346
; CHECK-NEXT: store i16 0, ptr [[TMP38]], align 2
399-
; CHECK-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[TMP23]], i64 2
400-
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[TMP24]], i64 2
401-
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[TMP25]], i64 2
402-
; CHECK-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr [[TMP26]], i64 2
403-
; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP27]], i64 2
404-
; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[TMP28]], i64 2
405-
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr [[TMP29]], i64 2
347+
; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], 1
348+
; CHECK-NEXT: [[SHR_2:%.*]] = lshr i32 [[ADD_1]], 1
349+
; CHECK-NEXT: [[TMP54:%.*]] = trunc i32 [[SHR_2]] to i16
406350
; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr [[TMP30]], i64 2
407-
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
408-
; CHECK-NEXT: store i16 [[TMP47]], ptr [[TMP39]], align 2
409-
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <8 x i16> [[TMP6]], i32 1
410-
; CHECK-NEXT: store i16 [[TMP48]], ptr [[TMP40]], align 2
411-
; CHECK-NEXT: [[TMP49:%.*]] = extractelement <8 x i16> [[TMP6]], i32 2
412-
; CHECK-NEXT: store i16 [[TMP49]], ptr [[TMP41]], align 2
413-
; CHECK-NEXT: [[TMP50:%.*]] = extractelement <8 x i16> [[TMP6]], i32 3
414-
; CHECK-NEXT: store i16 [[TMP50]], ptr [[TMP42]], align 2
415-
; CHECK-NEXT: [[TMP51:%.*]] = extractelement <8 x i16> [[TMP6]], i32 4
416-
; CHECK-NEXT: store i16 [[TMP51]], ptr [[TMP43]], align 2
417-
; CHECK-NEXT: [[TMP52:%.*]] = extractelement <8 x i16> [[TMP6]], i32 5
418-
; CHECK-NEXT: store i16 [[TMP52]], ptr [[TMP44]], align 2
419-
; CHECK-NEXT: [[TMP53:%.*]] = extractelement <8 x i16> [[TMP6]], i32 6
420-
; CHECK-NEXT: store i16 [[TMP53]], ptr [[TMP45]], align 2
421-
; CHECK-NEXT: [[TMP54:%.*]] = extractelement <8 x i16> [[TMP6]], i32 7
422351
; CHECK-NEXT: store i16 [[TMP54]], ptr [[TMP46]], align 2
423-
; CHECK-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr [[TMP23]], i64 12
424-
; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr [[TMP24]], i64 12
425-
; CHECK-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr [[TMP25]], i64 12
426-
; CHECK-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr [[TMP26]], i64 12
427-
; CHECK-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr [[TMP27]], i64 12
428-
; CHECK-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr [[TMP28]], i64 12
429-
; CHECK-NEXT: [[TMP61:%.*]] = getelementptr i8, ptr [[TMP29]], i64 12
352+
; CHECK-NEXT: [[SUB_0:%.*]] = sub i32 0, [[MUL_0]]
353+
; CHECK-NEXT: [[SHR_3:%.*]] = lshr i32 [[SUB_0]], 1
354+
; CHECK-NEXT: [[TMP70:%.*]] = trunc i32 [[SHR_3]] to i16
430355
; CHECK-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr [[TMP30]], i64 12
431-
; CHECK-NEXT: [[TMP63:%.*]] = extractelement <8 x i16> [[TMP9]], i32 0
432-
; CHECK-NEXT: store i16 [[TMP63]], ptr [[TMP55]], align 2
433-
; CHECK-NEXT: [[TMP64:%.*]] = extractelement <8 x i16> [[TMP9]], i32 1
434-
; CHECK-NEXT: store i16 [[TMP64]], ptr [[TMP56]], align 2
435-
; CHECK-NEXT: [[TMP65:%.*]] = extractelement <8 x i16> [[TMP9]], i32 2
436-
; CHECK-NEXT: store i16 [[TMP65]], ptr [[TMP57]], align 2
437-
; CHECK-NEXT: [[TMP66:%.*]] = extractelement <8 x i16> [[TMP9]], i32 3
438-
; CHECK-NEXT: store i16 [[TMP66]], ptr [[TMP58]], align 2
439-
; CHECK-NEXT: [[TMP67:%.*]] = extractelement <8 x i16> [[TMP9]], i32 4
440-
; CHECK-NEXT: store i16 [[TMP67]], ptr [[TMP59]], align 2
441-
; CHECK-NEXT: [[TMP68:%.*]] = extractelement <8 x i16> [[TMP9]], i32 5
442-
; CHECK-NEXT: store i16 [[TMP68]], ptr [[TMP60]], align 2
443-
; CHECK-NEXT: [[TMP69:%.*]] = extractelement <8 x i16> [[TMP9]], i32 6
444-
; CHECK-NEXT: store i16 [[TMP69]], ptr [[TMP61]], align 2
445-
; CHECK-NEXT: [[TMP70:%.*]] = extractelement <8 x i16> [[TMP9]], i32 7
446356
; CHECK-NEXT: store i16 [[TMP70]], ptr [[TMP62]], align 2
447-
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr [[TMP23]], i64 4
448-
; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i8, ptr [[TMP24]], i64 4
449-
; CHECK-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr [[TMP25]], i64 4
450-
; CHECK-NEXT: [[TMP74:%.*]] = getelementptr i8, ptr [[TMP26]], i64 4
451-
; CHECK-NEXT: [[TMP75:%.*]] = getelementptr i8, ptr [[TMP27]], i64 4
452-
; CHECK-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr [[TMP28]], i64 4
453-
; CHECK-NEXT: [[TMP77:%.*]] = getelementptr i8, ptr [[TMP29]], i64 4
357+
; CHECK-NEXT: [[OR_0:%.*]] = or i32 [[X]], 1
358+
; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[OR_0]], 1
359+
; CHECK-NEXT: [[SHR_4:%.*]] = lshr i32 [[ADD_2]], 1
360+
; CHECK-NEXT: [[TMP86:%.*]] = trunc i32 [[SHR_4]] to i16
454361
; CHECK-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr [[TMP30]], i64 4
455-
; CHECK-NEXT: [[TMP79:%.*]] = extractelement <8 x i16> [[TMP13]], i32 0
456-
; CHECK-NEXT: store i16 [[TMP79]], ptr [[TMP71]], align 2
457-
; CHECK-NEXT: [[TMP80:%.*]] = extractelement <8 x i16> [[TMP13]], i32 1
458-
; CHECK-NEXT: store i16 [[TMP80]], ptr [[TMP72]], align 2
459-
; CHECK-NEXT: [[TMP81:%.*]] = extractelement <8 x i16> [[TMP13]], i32 2
460-
; CHECK-NEXT: store i16 [[TMP81]], ptr [[TMP73]], align 2
461-
; CHECK-NEXT: [[TMP82:%.*]] = extractelement <8 x i16> [[TMP13]], i32 3
462-
; CHECK-NEXT: store i16 [[TMP82]], ptr [[TMP74]], align 2
463-
; CHECK-NEXT: [[TMP83:%.*]] = extractelement <8 x i16> [[TMP13]], i32 4
464-
; CHECK-NEXT: store i16 [[TMP83]], ptr [[TMP75]], align 2
465-
; CHECK-NEXT: [[TMP84:%.*]] = extractelement <8 x i16> [[TMP13]], i32 5
466-
; CHECK-NEXT: store i16 [[TMP84]], ptr [[TMP76]], align 2
467-
; CHECK-NEXT: [[TMP85:%.*]] = extractelement <8 x i16> [[TMP13]], i32 6
468-
; CHECK-NEXT: store i16 [[TMP85]], ptr [[TMP77]], align 2
469-
; CHECK-NEXT: [[TMP86:%.*]] = extractelement <8 x i16> [[TMP13]], i32 7
470362
; CHECK-NEXT: store i16 [[TMP86]], ptr [[TMP78]], align 2
471-
; CHECK-NEXT: [[TMP87:%.*]] = getelementptr i8, ptr [[TMP23]], i64 10
472-
; CHECK-NEXT: [[TMP88:%.*]] = getelementptr i8, ptr [[TMP24]], i64 10
473-
; CHECK-NEXT: [[TMP89:%.*]] = getelementptr i8, ptr [[TMP25]], i64 10
474-
; CHECK-NEXT: [[TMP90:%.*]] = getelementptr i8, ptr [[TMP26]], i64 10
475-
; CHECK-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr [[TMP27]], i64 10
476-
; CHECK-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr [[TMP28]], i64 10
477-
; CHECK-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr [[TMP29]], i64 10
478-
; CHECK-NEXT: [[TMP94:%.*]] = getelementptr i8, ptr [[TMP30]], i64 10
479-
; CHECK-NEXT: store i16 0, ptr [[TMP87]], align 2
480-
; CHECK-NEXT: store i16 0, ptr [[TMP88]], align 2
481-
; CHECK-NEXT: store i16 0, ptr [[TMP89]], align 2
482-
; CHECK-NEXT: store i16 0, ptr [[TMP90]], align 2
483-
; CHECK-NEXT: store i16 0, ptr [[TMP91]], align 2
484-
; CHECK-NEXT: store i16 0, ptr [[TMP92]], align 2
485-
; CHECK-NEXT: store i16 0, ptr [[TMP93]], align 2
363+
; CHECK-NEXT: [[GEP_0_2:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP21]]
364+
; CHECK-NEXT: [[TMP94:%.*]] = getelementptr i8, ptr [[GEP_0_2]], i64 10
486365
; CHECK-NEXT: store i16 0, ptr [[TMP94]], align 2
487-
; CHECK-NEXT: [[TMP95:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT2]] to <8 x i16>
488-
; CHECK-NEXT: [[TMP96:%.*]] = or <8 x i16> [[TMP95]], splat (i16 1)
489-
; CHECK-NEXT: [[TMP97:%.*]] = add <8 x i16> [[TMP96]], splat (i16 1)
490-
; CHECK-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr [[TMP23]], i64 8
491-
; CHECK-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr [[TMP24]], i64 8
492-
; CHECK-NEXT: [[TMP100:%.*]] = getelementptr i8, ptr [[TMP25]], i64 8
493-
; CHECK-NEXT: [[TMP101:%.*]] = getelementptr i8, ptr [[TMP26]], i64 8
494-
; CHECK-NEXT: [[TMP102:%.*]] = getelementptr i8, ptr [[TMP27]], i64 8
495-
; CHECK-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr [[TMP28]], i64 8
496-
; CHECK-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr [[TMP29]], i64 8
366+
; CHECK-NEXT: [[TRUNC_3:%.*]] = trunc i32 [[TMP22]] to i16
367+
; CHECK-NEXT: [[OR_1:%.*]] = or i16 [[TRUNC_3]], 1
368+
; CHECK-NEXT: [[TMP113:%.*]] = add i16 [[OR_1]], 1
497369
; CHECK-NEXT: [[TMP105:%.*]] = getelementptr i8, ptr [[TMP30]], i64 8
498-
; CHECK-NEXT: [[TMP106:%.*]] = extractelement <8 x i16> [[TMP97]], i32 0
499-
; CHECK-NEXT: store i16 [[TMP106]], ptr [[TMP98]], align 2
500-
; CHECK-NEXT: [[TMP107:%.*]] = extractelement <8 x i16> [[TMP97]], i32 1
501-
; CHECK-NEXT: store i16 [[TMP107]], ptr [[TMP99]], align 2
502-
; CHECK-NEXT: [[TMP108:%.*]] = extractelement <8 x i16> [[TMP97]], i32 2
503-
; CHECK-NEXT: store i16 [[TMP108]], ptr [[TMP100]], align 2
504-
; CHECK-NEXT: [[TMP109:%.*]] = extractelement <8 x i16> [[TMP97]], i32 3
505-
; CHECK-NEXT: store i16 [[TMP109]], ptr [[TMP101]], align 2
506-
; CHECK-NEXT: [[TMP110:%.*]] = extractelement <8 x i16> [[TMP97]], i32 4
507-
; CHECK-NEXT: store i16 [[TMP110]], ptr [[TMP102]], align 2
508-
; CHECK-NEXT: [[TMP111:%.*]] = extractelement <8 x i16> [[TMP97]], i32 5
509-
; CHECK-NEXT: store i16 [[TMP111]], ptr [[TMP103]], align 2
510-
; CHECK-NEXT: [[TMP112:%.*]] = extractelement <8 x i16> [[TMP97]], i32 6
511-
; CHECK-NEXT: store i16 [[TMP112]], ptr [[TMP104]], align 2
512-
; CHECK-NEXT: [[TMP113:%.*]] = extractelement <8 x i16> [[TMP97]], i32 7
513370
; CHECK-NEXT: store i16 [[TMP113]], ptr [[TMP105]], align 2
514-
; CHECK-NEXT: [[TMP114:%.*]] = getelementptr i8, ptr [[TMP23]], i64 6
515-
; CHECK-NEXT: [[TMP115:%.*]] = getelementptr i8, ptr [[TMP24]], i64 6
516-
; CHECK-NEXT: [[TMP116:%.*]] = getelementptr i8, ptr [[TMP25]], i64 6
517-
; CHECK-NEXT: [[TMP117:%.*]] = getelementptr i8, ptr [[TMP26]], i64 6
518-
; CHECK-NEXT: [[TMP118:%.*]] = getelementptr i8, ptr [[TMP27]], i64 6
519-
; CHECK-NEXT: [[TMP119:%.*]] = getelementptr i8, ptr [[TMP28]], i64 6
520-
; CHECK-NEXT: [[TMP120:%.*]] = getelementptr i8, ptr [[TMP29]], i64 6
521371
; CHECK-NEXT: [[TMP121:%.*]] = getelementptr i8, ptr [[TMP30]], i64 6
522-
; CHECK-NEXT: store i16 0, ptr [[TMP114]], align 2
523-
; CHECK-NEXT: store i16 0, ptr [[TMP115]], align 2
524-
; CHECK-NEXT: store i16 0, ptr [[TMP116]], align 2
525-
; CHECK-NEXT: store i16 0, ptr [[TMP117]], align 2
526-
; CHECK-NEXT: store i16 0, ptr [[TMP118]], align 2
527-
; CHECK-NEXT: store i16 0, ptr [[TMP119]], align 2
528-
; CHECK-NEXT: store i16 0, ptr [[TMP120]], align 2
529372
; CHECK-NEXT: store i16 0, ptr [[TMP121]], align 2
530-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
531-
; CHECK-NEXT: [[TMP122:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
532-
; CHECK-NEXT: br i1 [[TMP122]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
533-
; CHECK: [[MIDDLE_BLOCK]]:
534-
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
535-
; CHECK: [[SCALAR_PH]]:
373+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[TMP21]], 8
374+
; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[TMP21]], 128
375+
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]]
376+
; CHECK: [[EXIT]]:
377+
; CHECK-NEXT: ret void
536378
;
537379
entry:
538380
%temp1 = alloca [64 x i32], align 4

0 commit comments

Comments
 (0)