Skip to content

Commit 079dca0

Browse files
bondhugulatensorflower-gardener
authored andcommitted
unroll and jam: fix order of jammed bodies
- bodies would earlier appear in the order (i, i+3, i+2, i+1) instead of (i, i+1, i+2, i+3) for example for factor 4. - clean up hardcoded test cases Signed-off-by: Uday Bondhugula <uday@polymagelabs.com> Closes #170 COPYBARA_INTEGRATE_REVIEW=tensorflow/mlir#170 from bondhugula:ujam b66b405b2b1894a03b376952e32a9d0292042665 PiperOrigin-RevId: 273613131
1 parent 251293d commit 079dca0

File tree

2 files changed

+36
-26
lines changed

2 files changed

+36
-26
lines changed

lib/Transforms/LoopUnrollAndJam.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,8 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
209209
forOp.setStep(step * unrollJamFactor);
210210

211211
auto *forOpIV = forOp.getInductionVar();
212-
// Unroll and jam (appends unrollJamFactor-1 additional copies).
213-
for (unsigned i = 1; i < unrollJamFactor; i++) {
212+
// Unroll and jam (appends unrollJamFactor - 1 additional copies).
213+
for (unsigned i = unrollJamFactor - 1; i >= 1; --i) {
214214
// Operand map persists across all sub-blocks.
215215
BlockAndValueMapping operandMapping;
216216
for (auto &subBlock : subBlocks) {

test/Transforms/unroll-jam.mlir

Lines changed: 34 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
11
// RUN: mlir-opt %s -affine-loop-unroll-jam -unroll-jam-factor=2 | FileCheck %s
2+
// RUN: mlir-opt %s -affine-loop-unroll-jam -unroll-jam-factor=4 | FileCheck --check-prefix=UJAM-FOUR %s
23

34
// CHECK-DAG: [[MAP_PLUS_1:#map[0-9]+]] = (d0) -> (d0 + 1)
4-
// CHECK-DAG: [[M1:#map[0-9]+]] = ()[s0] -> (s0 + 8)
55
// CHECK-DAG: [[MAP_DIV_OFFSET:#map[0-9]+]] = ()[s0] -> (((s0 - 1) floordiv 2) * 2 + 1)
66
// CHECK-DAG: [[MAP_MULTI_RES:#map[0-9]+]] = ()[s0, s1] -> ((s0 floordiv 2) * 2, (s1 floordiv 2) * 2, 1024)
7+
// CHECK-DAG: [[MAP_SYM_UB:#map[0-9]+]] = ()[s0, s1] -> (s0, s1, 1024)
8+
9+
// UJAM-FOUR-DAG: [[UBMAP:#map[0-9]+]] = ()[s0] -> (s0 + 8)
10+
// UJAM-FOUR-DAG: [[MAP_PLUS_1:#map[0-9]+]] = (d0) -> (d0 + 1)
11+
// UJAM-FOUR-DAG: [[MAP_PLUS_2:#map[0-9]+]] = (d0) -> (d0 + 2)
12+
// UJAM-FOUR-DAG: [[MAP_PLUS_3:#map[0-9]+]] = (d0) -> (d0 + 3)
713

814
// CHECK-LABEL: func @unroll_jam_imperfect_nest() {
915
func @unroll_jam_imperfect_nest() {
@@ -29,33 +35,33 @@ func @unroll_jam_imperfect_nest() {
2935
// CHECK-NEXT: "addi32"([[RES4]], [[RES4]]) : (i32, i32) -> i32
3036
// CHECK-NEXT: }
3137
// CHECK: "foo"([[IV0]], [[RES1]])
32-
// CHECK-NEXT: {{.*}} = affine.apply [[MAP_PLUS_1]]([[IV0]])
38+
// CHECK-NEXT: affine.apply [[MAP_PLUS_1]]([[IV0]])
3339
// CHECK-NEXT: "foo"({{.*}}, [[RES2]])
3440
// CHECK: }
3541
// Cleanup loop (single iteration).
36-
// CHECK: %{{.*}} = "addi32"(%c100, %c100)
42+
// CHECK: "addi32"(%c100, %c100)
3743
// CHECK-NEXT: affine.for [[IV0]] = 0 to 17 {
3844
// CHECK-NEXT: [[RESC:%[0-9]+]] = "addi32"(%c100, %c100)
3945
// CHECK-NEXT: "addi32"([[RESC]], [[RESC]]) : (i32, i32) -> i32
4046
// CHECK-NEXT: }
41-
// CHECK-NEXT: %{{.*}} = "foo"(%c100, %{{.*}})
47+
// CHECK-NEXT: "foo"(%c100, %{{.*}})
4248
// CHECK-NEXT: return
4349

4450
// CHECK-LABEL: func @loop_nest_unknown_count_1
4551
// CHECK-SAME: [[N:arg[0-9]+]]: index
4652
func @loop_nest_unknown_count_1(%N : index) {
4753
// CHECK-NEXT: affine.for %{{.*}} = 1 to [[MAP_DIV_OFFSET]]()[%[[N]]] step 2 {
4854
// CHECK-NEXT: affine.for %{{.*}} = 1 to 100 {
49-
// CHECK-NEXT: %{{.*}} = "foo"() : () -> i32
50-
// CHECK-NEXT: %{{.*}} = "foo"() : () -> i32
55+
// CHECK-NEXT: "foo"() : () -> i32
56+
// CHECK-NEXT: "foo"() : () -> i32
5157
// CHECK-NEXT: }
5258
// CHECK-NEXT: }
5359
// A cleanup loop should be generated here.
5460
// CHECK-NEXT: affine.for %{{.*}} = [[MAP_DIV_OFFSET]]()[%[[N]]] to %[[N]] {
5561
// CHECK-NEXT: affine.for %{{.*}} = 1 to 100 {
5662
// CHECK-NEXT: "foo"() : () -> i32
57-
// CHECK_NEXT: }
58-
// CHECK_NEXT: }
63+
// CHECK-NEXT: }
64+
// CHECK-NEXT: }
5965
affine.for %i = 1 to %N {
6066
affine.for %j = 1 to 100 {
6167
%x = "foo"() : () -> i32
@@ -64,24 +70,28 @@ func @loop_nest_unknown_count_1(%N : index) {
6470
return
6571
}
6672

67-
// CHECK-LABEL: func @loop_nest_unknown_count_2
68-
// CHECK-SAME: %[[N:arg[0-9]+]]: index
73+
// UJAM-FOUR-LABEL: func @loop_nest_unknown_count_2
74+
// UJAM-FOUR-SAME: %[[N:arg[0-9]+]]: index
6975
func @loop_nest_unknown_count_2(%N : index) {
70-
// CHECK-NEXT: affine.for [[IV0:%arg[0-9]+]] = %[[N]] to [[M1]]()[%[[N]]] step 2 {
71-
// CHECK-NEXT: affine.for [[IV1:%arg[0-9]+]] = 1 to 100 {
72-
// CHECK-NEXT: "foo"([[IV0]]) : (index) -> i32
73-
// CHECK-NEXT: [[RES:%[0-9]+]] = affine.apply #map{{[0-9]+}}([[IV0]])
74-
// CHECK-NEXT: "foo"([[RES]])
75-
// CHECK-NEXT: }
76-
// CHECK-NEXT: }
76+
// UJAM-FOUR-NEXT: affine.for [[IV0:%arg[0-9]+]] = %[[N]] to [[UBMAP]]()[%[[N]]] step 4 {
77+
// UJAM-FOUR-NEXT: affine.for [[IV1:%arg[0-9]+]] = 1 to 100 {
78+
// UJAM-FOUR-NEXT: "foo"([[IV0]])
79+
// UJAM-FOUR-NEXT: [[IV_PLUS_1:%[0-9]+]] = affine.apply [[MAP_PLUS_1]]([[IV0]])
80+
// UJAM-FOUR-NEXT: "foo"([[IV_PLUS_1]])
81+
// UJAM-FOUR-NEXT: [[IV_PLUS_2:%[0-9]+]] = affine.apply [[MAP_PLUS_2]]([[IV0]])
82+
// UJAM-FOUR-NEXT: "foo"([[IV_PLUS_2]])
83+
// UJAM-FOUR-NEXT: [[IV_PLUS_3:%[0-9]+]] = affine.apply [[MAP_PLUS_3]]([[IV0]])
84+
// UJAM-FOUR-NEXT: "foo"([[IV_PLUS_3]])
85+
// UJAM-FOUR-NEXT: }
86+
// UJAM-FOUR-NEXT: }
7787
// The cleanup loop is a single iteration one and is promoted.
78-
// CHECK-NEXT: [[RES:%[0-9]+]] = affine.apply [[M1]]()[%[[N]]]
79-
// CHECK-NEXT: affine.for [[IV0]] = 1 to 100 {
80-
// CHECK-NEXT: "foo"([[RES]])
81-
// CHECK_NEXT: }
88+
// UJAM-FOUR-NEXT: [[RES:%[0-9]+]] = affine.apply [[UBMAP]]()[%[[N]]]
89+
// UJAM-FOUR-NEXT: affine.for [[IV0]] = 1 to 100 {
90+
// UJAM-FOUR-NEXT: "foo"([[RES]])
91+
// UJAM-FOUR-NEXT: }
8292
affine.for %i = %N to ()[s0] -> (s0+9) ()[%N] {
8393
affine.for %j = 1 to 100 {
84-
%x = "foo"(%i) : (index) -> i32
94+
"foo"(%i) : (index) -> ()
8595
}
8696
}
8797
return
@@ -102,11 +112,11 @@ func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index)
102112
// CHECK-NEXT: affine.for [[IV0:%arg[0-9]+]] = 0 to min [[MAP_MULTI_RES]]()[%[[M]], %[[N]]] step 2 {
103113
// CHECK-NEXT: affine.for [[IV1:%arg[0-9]+]] = 0 to %[[K]] {
104114
// CHECK-NEXT: "foo"([[IV0]], [[IV1]])
105-
// CHECK-NEXT: [[RES:%[0-9]+]] = affine.apply #map0([[IV0]])
115+
// CHECK-NEXT: [[RES:%[0-9]+]] = affine.apply [[MAP_PLUS_1]]([[IV0]])
106116
// CHECK-NEXT: "foo"([[RES]], [[IV1]])
107117
// CHECK-NEXT: }
108118
// CHECK-NEXT: }
109-
// CHECK-NEXT: affine.for [[IV0]] = max [[MAP_MULTI_RES]]()[%[[M]], %[[N]]] to min #map9()[%[[M]], %[[N]]] {
119+
// CHECK-NEXT: affine.for [[IV0]] = max [[MAP_MULTI_RES]]()[%[[M]], %[[N]]] to min [[MAP_SYM_UB]]()[%[[M]], %[[N]]] {
110120
// CHECK-NEXT: affine.for [[IV1]] = 0 to %[[K]] {
111121
// CHECK-NEXT: "foo"([[IV0]], [[IV1]])
112122
// CHECK-NEXT: }

0 commit comments

Comments
 (0)