@@ -873,3 +873,39 @@ func.func @lower_to_loops_with_rank_reducing_subviews(
873873// CHECKPARALLEL: %[[VAL:.+]] = memref.load %{{.+}}[%[[IV]]]
874874// CHECKPARALLEL: memref.store %[[VAL]], %{{.+}}[%[[IV]]]
875875// CHECKPARALLEL: }
876+
877+ // -----
878+
879+ func.func @transpose (%input: memref <?xf32 >,
880+ %init: memref <?xf32 >) {
881+ linalg.transpose ins (%input:memref <?xf32 >)
882+ outs (%init:memref <?xf32 >)
883+ permutation = [0 ]
884+ return
885+ }
886+ // CHECK-LABEL: func.func @transpose(
887+ // CHECK-SAME: %[[VAL_0:.*]]: memref<?xf32>,
888+ // CHECK-SAME: %[[VAL_1:.*]]: memref<?xf32>) {
889+ // CHECK: %[[VAL_2:.*]] = arith.constant 1 : index
890+ // CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
891+ // CHECK: %[[VAL_4:.*]] = memref.dim %[[VAL_0]], %[[VAL_3]] : memref<?xf32>
892+ // CHECK: scf.for %[[VAL_5:.*]] = %[[VAL_3]] to %[[VAL_4]] step %[[VAL_2]] {
893+ // CHECK: %[[VAL_6:.*]] = memref.load %[[VAL_0]]{{\[}}%[[VAL_5]]] : memref<?xf32>
894+ // CHECK: memref.store %[[VAL_6]], %[[VAL_1]]{{\[}}%[[VAL_5]]] : memref<?xf32>
895+ // CHECK: }
896+ // CHECK: return
897+ // CHECK: }
898+
899+ // CHECKPARALLEL-LABEL: func.func @transpose(
900+ // CHECKPARALLEL-SAME: %[[VAL_0:.*]]: memref<?xf32>,
901+ // CHECKPARALLEL-SAME: %[[VAL_1:.*]]: memref<?xf32>) {
902+ // CHECKPARALLEL: %[[VAL_2:.*]] = arith.constant 1 : index
903+ // CHECKPARALLEL: %[[VAL_3:.*]] = arith.constant 0 : index
904+ // CHECKPARALLEL: %[[VAL_4:.*]] = memref.dim %[[VAL_0]], %[[VAL_3]] : memref<?xf32>
905+ // CHECKPARALLEL: scf.parallel (%[[VAL_5:.*]]) = (%[[VAL_3]]) to (%[[VAL_4]]) step (%[[VAL_2]]) {
906+ // CHECKPARALLEL: %[[VAL_6:.*]] = memref.load %[[VAL_0]]{{\[}}%[[VAL_5]]] : memref<?xf32>
907+ // CHECKPARALLEL: memref.store %[[VAL_6]], %[[VAL_1]]{{\[}}%[[VAL_5]]] : memref<?xf32>
908+ // CHECKPARALLEL: scf.reduce
909+ // CHECKPARALLEL: }
910+ // CHECKPARALLEL: return
911+ // CHECKPARALLEL: }
0 commit comments