|
| 1 | +# REQUIRES: asserts |
| 2 | +# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \ |
| 3 | +# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \ |
| 4 | +# RUN: -window-search-ratio=100 -window-search-num=100 -window-diff-limit=1 \ |
| 5 | +# RUN: | FileCheck %s |
| 6 | + |
| 7 | +# We want to verify that all three V6_vaddw instructions are emitted in the same cycle. |
| 8 | +# CHECK-LABEL: Current window Offset is 2 |
| 9 | +# CHECK: Cycle [[CycleNum:[0-9]+]] [{{S.[0-9]+}}]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr |
| 10 | +# CHECK: Cycle [[CycleNum]] [{{S.[0-9]+}}]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr |
| 11 | +# CHECK: Cycle [[CycleNum]] [{{S.[0-9]+}}]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr |
| 12 | +# CHECK-LABEL: Current window Offset is 3 |
| 13 | + |
| 14 | +--- | |
| 15 | + define void @add_parallel(i32 %N, ptr noalias %x, ptr noalias %y) { |
| 16 | + entry: |
| 17 | + %isZeroLength = icmp eq i32 %N, 0 |
| 18 | + br i1 %isZeroLength, label %loop.exit, label %loop.preheader |
| 19 | + |
| 20 | + loop.preheader: ; preds = %entry |
| 21 | + %half_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1056964608) |
| 22 | + %one_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1065353216) |
| 23 | + %two_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1073741824) |
| 24 | + br label %loop.body |
| 25 | + |
| 26 | + loop.exit: ; preds = %loop.body, %entry |
| 27 | + ret void |
| 28 | + |
| 29 | + loop.body: ; preds = %loop.body, %loop.preheader |
| 30 | + %lsr.iv1 = phi ptr [ %cgep2, %loop.body ], [ %x, %loop.preheader ] |
| 31 | + %lsr.iv = phi ptr [ %cgep1, %loop.body ], [ %y, %loop.preheader ] |
| 32 | + %index = phi i32 [ 0, %loop.preheader ], [ %index.next, %loop.body ] |
| 33 | + %vec_x1 = load <32 x i32>, ptr %lsr.iv1, align 128 |
| 34 | + %vec_add_1 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %one_splat, <32 x i32> %vec_x1) |
| 35 | + %vec_add_2 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %half_splat, <32 x i32> %vec_x1) |
| 36 | + %vec_add_3 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %two_splat, <32 x i32> %vec_x1) |
| 37 | + %vec_add_4 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_1, <32 x i32> %vec_add_2) |
| 38 | + %vec_add_5 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_1, <32 x i32> %vec_add_3) |
| 39 | + %vec_add_6 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_5, <32 x i32> %vec_add_4) |
| 40 | + store <32 x i32> %vec_add_6, ptr %lsr.iv, align 128 |
| 41 | + %index.next = add nuw i32 %index, 32 |
| 42 | + %continue = icmp ult i32 %index.next, %N |
| 43 | + %cgep1 = getelementptr i8, ptr %lsr.iv, i32 128 |
| 44 | + %cgep2 = getelementptr i8, ptr %lsr.iv1, i32 128 |
| 45 | + br i1 %continue, label %loop.body, label %loop.exit |
| 46 | + } |
| 47 | + |
| 48 | + declare <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32) |
| 49 | + declare <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32>, <32 x i32>) |
| 50 | +... |
| 51 | +--- |
| 52 | +name: add_parallel |
| 53 | +tracksRegLiveness: true |
| 54 | +body: | |
| 55 | + bb.0.entry: |
| 56 | + successors: %bb.2(0x30000000), %bb.1(0x50000000) |
| 57 | + liveins: $r0, $r1, $r2 |
| 58 | +
|
| 59 | + %0:intregs = COPY $r2 |
| 60 | + %1:intregs = COPY $r1 |
| 61 | + %2:intregs = COPY $r0 |
| 62 | + %3:predregs = C2_cmpeqi %2, 0 |
| 63 | + J2_jumpt killed %3, %bb.2, implicit-def dead $pc |
| 64 | + J2_jump %bb.1, implicit-def dead $pc |
| 65 | +
|
| 66 | + bb.1.loop.preheader: |
| 67 | + successors: %bb.3(0x80000000) |
| 68 | +
|
| 69 | + %4:intregs = A2_tfrsi 1056964608 |
| 70 | + %5:hvxvr = V6_lvsplatw killed %4 |
| 71 | + %6:intregs = A2_tfrsi 1065353216 |
| 72 | + %7:hvxvr = V6_lvsplatw killed %6 |
| 73 | + %8:intregs = A2_tfrsi 1073741824 |
| 74 | + %9:hvxvr = V6_lvsplatw killed %8 |
| 75 | + %10:intregs = A2_addi %2, 31 |
| 76 | + %11:intregs = S2_lsr_i_r %10, 5 |
| 77 | + %12:intregs = COPY %11 |
| 78 | + J2_loop0r %bb.3, %12, implicit-def $lc0, implicit-def $sa0, implicit-def $usr |
| 79 | + J2_jump %bb.3, implicit-def dead $pc |
| 80 | +
|
| 81 | + bb.2.loop.exit: |
| 82 | + PS_jmpret $r31, implicit-def dead $pc |
| 83 | +
|
| 84 | + bb.3.loop.body (machine-block-address-taken): |
| 85 | + successors: %bb.3(0x7c000000), %bb.2(0x04000000) |
| 86 | +
|
| 87 | + %13:intregs = PHI %1, %bb.1, %14, %bb.3 |
| 88 | + %15:intregs = PHI %0, %bb.1, %16, %bb.3 |
| 89 | + %17:hvxvr, %14:intregs = V6_vL32b_pi %13, 128 :: (load (s1024) from %ir.lsr.iv1) |
| 90 | + %18:hvxvr = V6_vaddw %7, %17 |
| 91 | + %19:hvxvr = V6_vaddw %5, %17 |
| 92 | + %20:hvxvr = V6_vaddw %9, %17 |
| 93 | + %21:hvxvr = V6_vaddw %18, killed %19 |
| 94 | + %22:hvxvr = V6_vaddw %18, killed %20 |
| 95 | + %23:hvxvr = V6_vaddw killed %22, killed %21 |
| 96 | + %16:intregs = V6_vS32b_pi %15, 128, killed %23 :: (store (s1024) into %ir.lsr.iv) |
| 97 | + ENDLOOP0 %bb.3, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 |
| 98 | + J2_jump %bb.2, implicit-def dead $pc |
| 99 | +
|
| 100 | +... |
0 commit comments