@@ -229,15 +229,15 @@ func.func @forward_dead_store_negative(%arg0: i1, %arg1 : memref<4x4xf32>,
229229// final `vector.transfer_write` should be preserved as:
230230// vector.transfer_write %2, %subview
231231
232- // CHECK-LABEL: func.func @collapse_shape
232+ // CHECK-LABEL: func.func @collapse_shape_and_read_from_source
233233// CHECK: scf.for {{.*}} {
234234// CHECK: vector.transfer_read
235235// CHECK: vector.transfer_write
236236// CHECK: vector.transfer_write
237237// CHECK: vector.transfer_read
238238// CHECK: vector.transfer_write
239239
240- func.func @collapse_shape (%in_0: memref <1 x20 x1 xi32 >, %vec: vector <4 xi32 >) {
240+ func.func @collapse_shape_and_read_from_source (%in_0: memref <1 x20 x1 xi32 >, %vec: vector <4 xi32 >) {
241241 %c0_i32 = arith.constant 0 : i32
242242 %c0 = arith.constant 0 : index
243243 %c4 = arith.constant 4 : index
@@ -257,6 +257,98 @@ func.func @collapse_shape(%in_0: memref<1x20x1xi32>, %vec: vector<4xi32>) {
257257 return
258258}
259259
260+ // The same regression test for expand_shape.
261+
262+ // CHECK-LABEL: func.func @expand_shape_and_read_from_source
263+ // CHECK: scf.for {{.*}} {
264+ // CHECK: vector.transfer_read
265+ // CHECK: vector.transfer_write
266+ // CHECK: vector.transfer_write
267+ // CHECK: vector.transfer_read
268+ // CHECK: vector.transfer_write
269+
270+ func.func @expand_shape_and_read_from_source (%in_0: memref <20 xi32 >, %vec: vector <1 x4 x1 xi32 >) {
271+ %c0_i32 = arith.constant 0 : i32
272+ %c0 = arith.constant 0 : index
273+ %c4 = arith.constant 4 : index
274+ %c20 = arith.constant 20 : index
275+
276+ %alloca = memref.alloca () {alignment = 64 : i64 } : memref <4 xi32 >
277+ %expand_shape = memref.expand_shape %alloca [[0 , 1 , 2 ]] output_shape [1 , 4 , 1 ] : memref <4 xi32 > into memref <1 x4 x1 xi32 >
278+ scf.for %arg0 = %c0 to %c20 step %c4 {
279+ %subview = memref.subview %in_0 [%arg0 ] [4 ] [1 ] : memref <20 xi32 > to memref <4 xi32 , strided <[1 ], offset : ?>>
280+ %1 = vector.transfer_read %subview [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <4 xi32 , strided <[1 ], offset : ?>>, vector <4 xi32 >
281+ // $alloca and $expand_shape alias
282+ vector.transfer_write %1 , %alloca [%c0 ] {in_bounds = [true ]} : vector <4 xi32 >, memref <4 xi32 >
283+ vector.transfer_write %vec , %expand_shape [%c0 , %c0 , %c0 ] {in_bounds = [true , true , true ]} : vector <1 x4 x1 xi32 >, memref <1 x4 x1 xi32 >
284+ %2 = vector.transfer_read %alloca [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <4 xi32 >, vector <4 xi32 >
285+ vector.transfer_write %2 , %subview [%c0 ] {in_bounds = [true ]} : vector <4 xi32 >, memref <4 xi32 , strided <[1 ], offset : ?>>
286+ }
287+ return
288+ }
289+
290+ // The same regression test, but the initial write is to the collapsed memref,
291+ // and the subsequent unforwardable read is from the collapse shape.
292+
293+ // CHECK-LABEL: func.func @collapse_shape_and_read_from_collapse
294+ // CHECK: scf.for {{.*}} {
295+ // CHECK: vector.transfer_read
296+ // CHECK: vector.transfer_write
297+ // CHECK: vector.transfer_write
298+ // CHECK: vector.transfer_read
299+ // CHECK: vector.transfer_write
300+
301+ func.func @collapse_shape_and_read_from_collapse (%in_0: memref <20 xi32 >, %vec: vector <1 x4 x1 xi32 >) {
302+ %c0_i32 = arith.constant 0 : i32
303+ %c0 = arith.constant 0 : index
304+ %c4 = arith.constant 4 : index
305+ %c20 = arith.constant 20 : index
306+
307+ %alloca = memref.alloca () {alignment = 64 : i64 } : memref <1 x4 x1 xi32 >
308+ %collapse_shape = memref.collapse_shape %alloca [[0 , 1 , 2 ]] : memref <1 x4 x1 xi32 > into memref <4 xi32 >
309+ scf.for %arg0 = %c0 to %c20 step %c4 {
310+ %subview = memref.subview %in_0 [%arg0 ] [4 ] [1 ] : memref <20 xi32 > to memref <4 xi32 , strided <[1 ], offset : ?>>
311+ %1 = vector.transfer_read %subview [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <4 xi32 , strided <[1 ], offset : ?>>, vector <4 xi32 >
312+ vector.transfer_write %1 , %collapse_shape [%c0 ] {in_bounds = [true ]} : vector <4 xi32 >, memref <4 xi32 >
313+ // $alloca and $collapse_shape alias
314+ vector.transfer_write %vec , %alloca [%c0 , %c0 , %c0 ] {in_bounds = [true , true , true ]} : vector <1 x4 x1 xi32 >, memref <1 x4 x1 xi32 >
315+ %2 = vector.transfer_read %collapse_shape [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <4 xi32 >, vector <4 xi32 >
316+ vector.transfer_write %2 , %subview [%c0 ] {in_bounds = [true ]} : vector <4 xi32 >, memref <4 xi32 , strided <[1 ], offset : ?>>
317+ }
318+ return
319+ }
320+
321+ // The same test except writing to the expanded source first (same as the
322+ // previous collapse test but for expand).
323+
324+ // CHECK-LABEL: func.func @expand_shape_and_read_from_expand
325+ // CHECK: scf.for {{.*}} {
326+ // CHECK: vector.transfer_read
327+ // CHECK: vector.transfer_write
328+ // CHECK: vector.transfer_write
329+ // CHECK: vector.transfer_read
330+ // CHECK: vector.transfer_write
331+
332+ func.func @expand_shape_and_read_from_expand (%in_0: memref <1 x20 x1 xi32 >, %vec: vector <4 xi32 >) {
333+ %c0_i32 = arith.constant 0 : i32
334+ %c0 = arith.constant 0 : index
335+ %c4 = arith.constant 4 : index
336+ %c20 = arith.constant 20 : index
337+
338+ %alloca = memref.alloca () {alignment = 64 : i64 } : memref <4 xi32 >
339+ %expand_shape = memref.expand_shape %alloca [[0 , 1 , 2 ]] output_shape [1 , 4 , 1 ] : memref <4 xi32 > into memref <1 x4 x1 xi32 >
340+ scf.for %arg0 = %c0 to %c20 step %c4 {
341+ %subview = memref.subview %in_0 [0 , %arg0 , 0 ] [1 , 4 , 1 ] [1 , 1 , 1 ] : memref <1 x20 x1 xi32 > to memref <1 x4 x1 xi32 , strided <[20 , 1 , 1 ], offset : ?>>
342+ %1 = vector.transfer_read %subview [%c0 , %c0 , %c0 ], %c0_i32 {in_bounds = [true , true , true ]} : memref <1 x4 x1 xi32 , strided <[20 , 1 , 1 ], offset : ?>>, vector <1 x4 x1 xi32 >
343+ vector.transfer_write %1 , %expand_shape [%c0 , %c0 , %c0 ] {in_bounds = [true , true , true ]} : vector <1 x4 x1 xi32 >, memref <1 x4 x1 xi32 >
344+ // $alloca and $expand_shape alias
345+ vector.transfer_write %vec , %alloca [%c0 ] {in_bounds = [true ]} : vector <4 xi32 >, memref <4 xi32 >
346+ %2 = vector.transfer_read %expand_shape [%c0 , %c0 , %c0 ], %c0_i32 {in_bounds = [true , true , true ]} : memref <1 x4 x1 xi32 >, vector <1 x4 x1 xi32 >
347+ vector.transfer_write %2 , %subview [%c0 , %c0 , %c0 ] {in_bounds = [true , true , true ]} : vector <1 x4 x1 xi32 >, memref <1 x4 x1 xi32 , strided <[20 , 1 , 1 ], offset : ?>>
348+ }
349+ return
350+ }
351+
260352// CHECK-LABEL: func @forward_dead_store_dynamic_same_index
261353// CHECK-NOT: vector.transfer_write
262354// CHECK-NOT: vector.transfer_read
0 commit comments