|  | 
|  | 1 | +# REQUIRES: asserts | 
|  | 2 | +# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \ | 
|  | 3 | +# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \ | 
|  | 4 | +# RUN: -window-search-ratio=100 -window-search-num=100 -window-diff-limit=1 \ | 
|  | 5 | +# RUN: | FileCheck %s | 
|  | 6 | + | 
|  | 7 | +# We want to verify that all three V6_vaddw instructions are emitted in the same cycle. | 
|  | 8 | +# CHECK-LABEL: Current window Offset is 2 | 
|  | 9 | +# CHECK: Cycle [[CycleNum:[0-9]+]] [{{S.[0-9]+}}]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr | 
|  | 10 | +# CHECK: Cycle [[CycleNum]] [{{S.[0-9]+}}]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr | 
|  | 11 | +# CHECK: Cycle [[CycleNum]] [{{S.[0-9]+}}]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr | 
|  | 12 | +# CHECK-LABEL: Current window Offset is 3 | 
|  | 13 | + | 
|  | 14 | +--- | | 
|  | 15 | +  define void @add_parallel(i32 %N, ptr noalias %x, ptr noalias %y) { | 
|  | 16 | +  entry: | 
|  | 17 | +    %isZeroLength = icmp eq i32 %N, 0 | 
|  | 18 | +    br i1 %isZeroLength, label %loop.exit, label %loop.preheader | 
|  | 19 | + | 
|  | 20 | +  loop.preheader:                                   ; preds = %entry | 
|  | 21 | +    %half_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1056964608) | 
|  | 22 | +    %one_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1065353216) | 
|  | 23 | +    %two_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1073741824) | 
|  | 24 | +    br label %loop.body | 
|  | 25 | + | 
|  | 26 | +  loop.exit:                                        ; preds = %loop.body, %entry | 
|  | 27 | +    ret void | 
|  | 28 | + | 
|  | 29 | +  loop.body:                                        ; preds = %loop.body, %loop.preheader | 
|  | 30 | +    %lsr.iv1 = phi ptr [ %cgep2, %loop.body ], [ %x, %loop.preheader ] | 
|  | 31 | +    %lsr.iv = phi ptr [ %cgep1, %loop.body ], [ %y, %loop.preheader ] | 
|  | 32 | +    %index = phi i32 [ 0, %loop.preheader ], [ %index.next, %loop.body ] | 
|  | 33 | +    %vec_x1 = load <32 x i32>, ptr %lsr.iv1, align 128 | 
|  | 34 | +    %vec_add_1 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %one_splat, <32 x i32> %vec_x1) | 
|  | 35 | +    %vec_add_2 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %half_splat, <32 x i32> %vec_x1) | 
|  | 36 | +    %vec_add_3 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %two_splat, <32 x i32> %vec_x1) | 
|  | 37 | +    %vec_add_4 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_1, <32 x i32> %vec_add_2) | 
|  | 38 | +    %vec_add_5 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_1, <32 x i32> %vec_add_3) | 
|  | 39 | +    %vec_add_6 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_5, <32 x i32> %vec_add_4) | 
|  | 40 | +    store <32 x i32> %vec_add_6, ptr %lsr.iv, align 128 | 
|  | 41 | +    %index.next = add nuw i32 %index, 32 | 
|  | 42 | +    %continue = icmp ult i32 %index.next, %N | 
|  | 43 | +    %cgep1 = getelementptr i8, ptr %lsr.iv, i32 128 | 
|  | 44 | +    %cgep2 = getelementptr i8, ptr %lsr.iv1, i32 128 | 
|  | 45 | +    br i1 %continue, label %loop.body, label %loop.exit | 
|  | 46 | +  } | 
|  | 47 | + | 
|  | 48 | +  declare <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32) | 
|  | 49 | +  declare <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32>, <32 x i32>) | 
|  | 50 | +... | 
|  | 51 | +--- | 
|  | 52 | +name:            add_parallel | 
|  | 53 | +tracksRegLiveness: true | 
|  | 54 | +body:             | | 
|  | 55 | +  bb.0.entry: | 
|  | 56 | +    successors: %bb.2(0x30000000), %bb.1(0x50000000) | 
|  | 57 | +    liveins: $r0, $r1, $r2 | 
|  | 58 | +
 | 
|  | 59 | +    %0:intregs = COPY $r2 | 
|  | 60 | +    %1:intregs = COPY $r1 | 
|  | 61 | +    %2:intregs = COPY $r0 | 
|  | 62 | +    %3:predregs = C2_cmpeqi %2, 0 | 
|  | 63 | +    J2_jumpt killed %3, %bb.2, implicit-def dead $pc | 
|  | 64 | +    J2_jump %bb.1, implicit-def dead $pc | 
|  | 65 | +
 | 
|  | 66 | +  bb.1.loop.preheader: | 
|  | 67 | +    successors: %bb.3(0x80000000) | 
|  | 68 | +
 | 
|  | 69 | +    %4:intregs = A2_tfrsi 1056964608 | 
|  | 70 | +    %5:hvxvr = V6_lvsplatw killed %4 | 
|  | 71 | +    %6:intregs = A2_tfrsi 1065353216 | 
|  | 72 | +    %7:hvxvr = V6_lvsplatw killed %6 | 
|  | 73 | +    %8:intregs = A2_tfrsi 1073741824 | 
|  | 74 | +    %9:hvxvr = V6_lvsplatw killed %8 | 
|  | 75 | +    %10:intregs = A2_addi %2, 31 | 
|  | 76 | +    %11:intregs = S2_lsr_i_r %10, 5 | 
|  | 77 | +    %12:intregs = COPY %11 | 
|  | 78 | +    J2_loop0r %bb.3, %12, implicit-def $lc0, implicit-def $sa0, implicit-def $usr | 
|  | 79 | +    J2_jump %bb.3, implicit-def dead $pc | 
|  | 80 | +
 | 
|  | 81 | +  bb.2.loop.exit: | 
|  | 82 | +    PS_jmpret $r31, implicit-def dead $pc | 
|  | 83 | +
 | 
|  | 84 | +  bb.3.loop.body (machine-block-address-taken): | 
|  | 85 | +    successors: %bb.3(0x7c000000), %bb.2(0x04000000) | 
|  | 86 | +
 | 
|  | 87 | +    %13:intregs = PHI %1, %bb.1, %14, %bb.3 | 
|  | 88 | +    %15:intregs = PHI %0, %bb.1, %16, %bb.3 | 
|  | 89 | +    %17:hvxvr, %14:intregs = V6_vL32b_pi %13, 128 :: (load (s1024) from %ir.lsr.iv1) | 
|  | 90 | +    %18:hvxvr = V6_vaddw %7, %17 | 
|  | 91 | +    %19:hvxvr = V6_vaddw %5, %17 | 
|  | 92 | +    %20:hvxvr = V6_vaddw %9, %17 | 
|  | 93 | +    %21:hvxvr = V6_vaddw %18, killed %19 | 
|  | 94 | +    %22:hvxvr = V6_vaddw %18, killed %20 | 
|  | 95 | +    %23:hvxvr = V6_vaddw killed %22, killed %21 | 
|  | 96 | +    %16:intregs = V6_vS32b_pi %15, 128, killed %23 :: (store (s1024) into %ir.lsr.iv) | 
|  | 97 | +    ENDLOOP0 %bb.3, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 | 
|  | 98 | +    J2_jump %bb.2, implicit-def dead $pc | 
|  | 99 | +
 | 
|  | 100 | +... | 
0 commit comments