From d8a46714aefa9d97c5d2d14a208cfa9f9a6d18ba Mon Sep 17 00:00:00 2001 From: linuxlonelyeagle Date: Mon, 25 Mar 2024 15:51:33 +0800 Subject: [PATCH 1/2] add lccl op size-effect. --- .../include/byteir/Dialect/Lccl/LcclOps.td | 31 +++++++++--------- compiler/test/Dialect/Ccl/ccl_bufferize.mlir | 32 +++++++++---------- 2 files changed, 32 insertions(+), 31 deletions(-) diff --git a/compiler/include/byteir/Dialect/Lccl/LcclOps.td b/compiler/include/byteir/Dialect/Lccl/LcclOps.td index 004b120a2..3219bda55 100644 --- a/compiler/include/byteir/Dialect/Lccl/LcclOps.td +++ b/compiler/include/byteir/Dialect/Lccl/LcclOps.td @@ -20,6 +20,7 @@ #define BYTEIR_DIALECT_LCCL_LCCL_OPS include "byteir/Dialect/Lccl/LcclBase.td" +include "mlir/Interfaces/SideEffectInterfaces.td" //===----------------------------------------------------------------------===// // Lccl Dialect operations. @@ -59,8 +60,8 @@ def Lccl_BroadcastOp : Lccl_ReplicaGroupsOp<"broadcast"> { }]; let arguments = (ins - AnyMemRef:$src, - Optional:$dynamic_replica_groups, + Arg:$src, + Arg, "", [MemRead]>:$dynamic_replica_groups, BoolAttr:$synchronous, OptionalAttr:$replica_groups, OptionalAttr:$unique_id @@ -76,8 +77,8 @@ def Lccl_SendOp : Lccl_Op<"send"> { }]; let arguments = (ins - AnyMemRef:$src, - Optional:$dynamic_target_index, + Arg:$src, + Arg, "", [MemRead]>:$dynamic_target_index, BoolAttr:$synchronous, OptionalAttr:$target_index ); @@ -92,8 +93,8 @@ def Lccl_RecvOp : Lccl_Op<"recv"> { }]; let arguments = (ins - AnyMemRef:$src, - Optional:$dynamic_source_index, + Arg:$src, + Arg, "", [MemRead]>:$dynamic_source_index, BoolAttr:$synchronous, OptionalAttr:$source_index ); @@ -115,9 +116,9 @@ def Lccl_AllReduceOp : Lccl_ReplicaGroupsOp<"all_reduce"> { }]; let arguments = (ins - AnyMemRef:$src, - AnyMemRef:$target, - Optional:$dynamic_replica_groups, + Arg:$src, + Arg:$target, + Arg, "", [MemRead]>:$dynamic_replica_groups, BoolAttr:$synchronous, StrAttr:$reduction, OptionalAttr:$replica_groups, @@ -139,9 +140,9 @@ def Lccl_AllGatherOp : Lccl_ReplicaGroupsOp<"all_gather"> { }]; let arguments = (ins - AnyMemRef:$src, - AnyMemRef:$target, - Optional:$dynamic_replica_groups, + Arg:$src, + Arg:$target, + Arg, "", [MemRead]>:$dynamic_replica_groups, BoolAttr:$synchronous, I64Attr:$axis, OptionalAttr:$replica_groups, @@ -162,9 +163,9 @@ def Lccl_ReduceScatterOp : Lccl_ReplicaGroupsOp<"reduce_scatter"> { }]; let arguments = (ins - AnyMemRef:$src, - AnyMemRef:$target, - Optional:$dynamic_replica_groups, + Arg:$src, + Arg:$target, + Arg, "", [MemRead]>:$dynamic_replica_groups, BoolAttr:$synchronous, StrAttr:$reduction, I64Attr:$axis, diff --git a/compiler/test/Dialect/Ccl/ccl_bufferize.mlir b/compiler/test/Dialect/Ccl/ccl_bufferize.mlir index 1afd3c61b..f9c064ca2 100644 --- a/compiler/test/Dialect/Ccl/ccl_bufferize.mlir +++ b/compiler/test/Dialect/Ccl/ccl_bufferize.mlir @@ -1,7 +1,7 @@ // RUN: byteir-opt %s -byteir-one-shot-bufferize -split-input-file | FileCheck %s func.func @broadcast(%arg0: tensor<2x3x8xf32>) -> tensor<2x3x8xf32> { - %0 = "ccl.broadcast"(%arg0) {replica_groups = [[2, 3]], synchronous = true} : (tensor<2x3x8xf32>) -> tensor<2x3x8xf32> + %0 = ccl.broadcast %arg0 {replica_groups = [[2, 3]], synchronous = true} : (tensor<2x3x8xf32>) -> tensor<2x3x8xf32> return %0 : tensor<2x3x8xf32> } @@ -14,7 +14,7 @@ func.func @broadcast(%arg0: tensor<2x3x8xf32>) -> tensor<2x3x8xf32> { // ----- func.func @broadcast_dynamic(%arg0: tensor<2x3x8xf32>, %arg1: tensor<1x4xindex>) -> tensor<2x3x8xf32> { - %0 = "ccl.broadcast"(%arg0, %arg1) {synchronous = true} : (tensor<2x3x8xf32>, tensor<1x4xindex>) -> tensor<2x3x8xf32> + %0 = ccl.broadcast %arg0, %arg1 {synchronous = true} : (tensor<2x3x8xf32>, tensor<1x4xindex>) -> tensor<2x3x8xf32> return %0 : tensor<2x3x8xf32> } // CHECK-LABEL: func.func @broadcast_dynamic( @@ -27,7 +27,7 @@ func.func @broadcast_dynamic(%arg0: tensor<2x3x8xf32>, %arg1: tensor<1x4xindex>) // ----- func.func @send(%arg0: tensor<3xf32>) -> tensor<3xf32> { - %0 = "ccl.send"(%arg0){ synchronous = true, target_index = 0 : i64 }: (tensor<3xf32>) -> tensor<3xf32> + %0 = ccl.send %arg0 { synchronous = true, target_index = 0 : i64 }: (tensor<3xf32>) -> tensor<3xf32> return %0 : tensor<3xf32> } // CHECK-LABEL: func.func @send( @@ -40,7 +40,7 @@ func.func @send(%arg0: tensor<3xf32>) -> tensor<3xf32> { func.func @send_dynamic(%arg0: tensor<3xf32>) -> tensor<3xf32> { %target_index = arith.constant 0 : i64 - %0 = "ccl.send"(%arg0, %target_index) { synchronous = true } : (tensor<3xf32>, i64) -> tensor<3xf32> + %0 = ccl.send %arg0, %target_index { synchronous = true } : (tensor<3xf32>, i64) -> tensor<3xf32> return %0 : tensor<3xf32> } // CHECK-LABEL: func.func @send_dynamic( @@ -53,7 +53,7 @@ func.func @send_dynamic(%arg0: tensor<3xf32>) -> tensor<3xf32> { // ----- func.func @recv(%arg0: tensor<3xf32>) -> tensor<3xf32> { - %0 = "ccl.recv"(%arg0){ synchronous = true, source_index = 0 : i64 } : (tensor<3xf32>) -> tensor<3xf32> + %0 = ccl.recv %arg0 { synchronous = true, source_index = 0 : i64 } : (tensor<3xf32>) -> tensor<3xf32> return %0 : tensor<3xf32> } // CHECK-LABEL: func.func @recv( @@ -66,7 +66,7 @@ func.func @recv(%arg0: tensor<3xf32>) -> tensor<3xf32> { func.func @recv_dynamic(%arg0: tensor<3xf32>) -> tensor<3xf32> { %target_index = arith.constant 0 : i64 - %0 = "ccl.recv"(%arg0, %target_index) { synchronous = true } : (tensor<3xf32>, i64) -> tensor<3xf32> + %0 = ccl.recv %arg0, %target_index { synchronous = true } : (tensor<3xf32>, i64) -> tensor<3xf32> return %0 : tensor<3xf32> } @@ -80,7 +80,7 @@ func.func @recv_dynamic(%arg0: tensor<3xf32>) -> tensor<3xf32> { // ----- func.func @all_gather_0(%arg0: tensor<4x4xf32>) -> tensor<8x4xf32> { - %0 = "ccl.all_gather"(%arg0) { replica_groups = [[0, 1] ,[2, 3]], axis = 0 : i64 , synchronous = true }: (tensor<4x4xf32>) -> tensor<8x4xf32> + %0 = ccl.all_gather %arg0 { replica_groups = [[0, 1] ,[2, 3]], axis = 0 : i64 , synchronous = true }: (tensor<4x4xf32>) -> tensor<8x4xf32> return %0 : tensor<8x4xf32> } // CHECK-LABEL: func.func @all_gather_0( @@ -93,7 +93,7 @@ func.func @all_gather_0(%arg0: tensor<4x4xf32>) -> tensor<8x4xf32> { // ----- func.func @all_gather_1(%arg0: tensor<4x4xf32>) -> tensor<4x8xf32> { - %0 = "ccl.all_gather"(%arg0) { replica_groups = [[0, 1] ,[2, 3]], axis = 1 : i64 , synchronous = true }: (tensor<4x4xf32>) -> tensor<4x8xf32> + %0 = ccl.all_gather %arg0 { replica_groups = [[0, 1] ,[2, 3]], axis = 1 : i64 , synchronous = true }: (tensor<4x4xf32>) -> tensor<4x8xf32> return %0 : tensor<4x8xf32> } // CHECK-LABEL: func.func @all_gather_1( @@ -106,7 +106,7 @@ func.func @all_gather_1(%arg0: tensor<4x4xf32>) -> tensor<4x8xf32> { // ----- func.func @all_gather_dynamic_0(%arg0: tensor<4x4xf32>, %arg1: tensor<2x2xindex>) -> tensor<8x4xf32> { - %0 = "ccl.all_gather"(%arg0, %arg1) {axis=0 : i64, synchronous=true}: (tensor<4x4xf32>, tensor<2x2xindex>) -> tensor<8x4xf32> + %0 = ccl.all_gather %arg0, %arg1 {axis=0 : i64, synchronous=true}: (tensor<4x4xf32>, tensor<2x2xindex>) -> tensor<8x4xf32> return %0 : tensor<8x4xf32> } // CHECK-LABEL: func.func @all_gather_dynamic_0( @@ -120,7 +120,7 @@ func.func @all_gather_dynamic_0(%arg0: tensor<4x4xf32>, %arg1: tensor<2x2xindex> // ----- func.func @all_gather_dynamic_1(%arg0: tensor<4x4xf32>, %arg1: tensor<2x2xindex>) -> tensor<4x8xf32> { - %0 = "ccl.all_gather"(%arg0, %arg1) {axis=1 : i64, synchronous=true}: (tensor<4x4xf32>, tensor<2x2xindex>) -> tensor<4x8xf32> + %0 = ccl.all_gather %arg0, %arg1 {axis=1 : i64, synchronous=true}: (tensor<4x4xf32>, tensor<2x2xindex>) -> tensor<4x8xf32> return %0 : tensor<4x8xf32> } // CHECK-LABEL: func.func @all_gather_dynamic_1( @@ -134,7 +134,7 @@ func.func @all_gather_dynamic_1(%arg0: tensor<4x4xf32>, %arg1: tensor<2x2xindex> // ----- func.func @all_reduce(%arg0: tensor<4xf32>) -> tensor<4xf32> { - %0 = "ccl.all_reduce"(%arg0) {reduction = "sum", synchronous=true, replica_groups = [[0, 1] ,[2, 3]]}: (tensor<4xf32>) -> tensor<4xf32> + %0 = ccl.all_reduce %arg0 {reduction = "sum", synchronous=true, replica_groups = [[0, 1] ,[2, 3]]}: (tensor<4xf32>) -> tensor<4xf32> return %0 : tensor<4xf32> } // CHECK-LABEL: func.func @all_reduce( @@ -147,7 +147,7 @@ func.func @all_reduce(%arg0: tensor<4xf32>) -> tensor<4xf32> { // ----- func.func @all_reduce_dynamic(%arg0: tensor<4xf32>, %arg1:tensor<1x4xi64>) -> tensor<4xf32> { - %0 = "ccl.all_reduce"(%arg0, %arg1) {reduction = "sum", synchronous=true}: (tensor<4xf32>, tensor<1x4xi64>) -> tensor<4xf32> + %0 = ccl.all_reduce %arg0, %arg1 {reduction = "sum", synchronous=true}: (tensor<4xf32>, tensor<1x4xi64>) -> tensor<4xf32> return %0 : tensor<4xf32> } // CHECK-LABEL: func.func @all_reduce_dynamic( @@ -161,7 +161,7 @@ func.func @all_reduce_dynamic(%arg0: tensor<4xf32>, %arg1:tensor<1x4xi64>) -> te // ----- func.func @reduce_scatter_0(%arg0: tensor<4x4xf32>) -> tensor<1x4xf32> { - %0 = "ccl.reduce_scatter"(%arg0) { reduction="sum", replica_groups = [[0, 1, 2, 3]], axis = 0 : i64 , synchronous=true } : (tensor<4x4xf32>) -> tensor<1x4xf32> + %0 = ccl.reduce_scatter %arg0 { reduction="sum", replica_groups = [[0, 1, 2, 3]], axis = 0 : i64 , synchronous=true } : (tensor<4x4xf32>) -> tensor<1x4xf32> return %0 : tensor<1x4xf32> } @@ -175,7 +175,7 @@ func.func @reduce_scatter_0(%arg0: tensor<4x4xf32>) -> tensor<1x4xf32> { // ----- func.func @reduce_scatter_1(%arg0: tensor<4x4xf32>) -> tensor<4x1xf32> { - %0 = "ccl.reduce_scatter"(%arg0) { reduction="sum", replica_groups = [[0, 1, 2, 3]], axis = 1 : i64 , synchronous=true } : (tensor<4x4xf32>) -> tensor<4x1xf32> + %0 = ccl.reduce_scatter %arg0 { reduction="sum", replica_groups = [[0, 1, 2, 3]], axis = 1 : i64 , synchronous=true } : (tensor<4x4xf32>) -> tensor<4x1xf32> return %0 : tensor<4x1xf32> } @@ -189,7 +189,7 @@ func.func @reduce_scatter_1(%arg0: tensor<4x4xf32>) -> tensor<4x1xf32> { // ----- func.func @reduce_scatter_dynamic_0(%arg0: tensor<4x4xf32>, %arg1: tensor<2x2xindex>) -> tensor<2x4xf32> { - %0 = "ccl.reduce_scatter"(%arg0, %arg1) { axis = 0 : i64, synchronous = true, reduction = "sum" }: (tensor<4x4xf32>, tensor<2x2xindex>) -> tensor<2x4xf32> + %0 = ccl.reduce_scatter %arg0, %arg1 { axis = 0 : i64, synchronous = true, reduction = "sum" }: (tensor<4x4xf32>, tensor<2x2xindex>) -> tensor<2x4xf32> return %0 : tensor<2x4xf32> } // CHECK-LABEL: func.func @reduce_scatter_dynamic_0( @@ -203,7 +203,7 @@ func.func @reduce_scatter_dynamic_0(%arg0: tensor<4x4xf32>, %arg1: tensor<2x2xin // ----- func.func @reduce_scatter_dynamic_1(%arg0: tensor<4x4xf32>, %arg1: tensor<2x2xindex>) -> tensor<4x2xf32> { - %0 = "ccl.reduce_scatter"(%arg0, %arg1) { axis=1 : i64, synchronous=true, reduction= "sum" } : (tensor<4x4xf32>, tensor<2x2xindex>) -> tensor<4x2xf32> + %0 = ccl.reduce_scatter %arg0, %arg1 { axis=1 : i64, synchronous=true, reduction= "sum" } : (tensor<4x4xf32>, tensor<2x2xindex>) -> tensor<4x2xf32> return %0 : tensor<4x2xf32> } // CHECK-LABEL: func.func @reduce_scatter_dynamic_1( From 3ed022bc53cfb7371f24550346c88ba256b366fa Mon Sep 17 00:00:00 2001 From: linuxlonelyeagle Date: Mon, 25 Mar 2024 17:47:29 +0800 Subject: [PATCH 2/2] fix nit. --- compiler/include/byteir/Dialect/Lccl/LcclOps.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler/include/byteir/Dialect/Lccl/LcclOps.td b/compiler/include/byteir/Dialect/Lccl/LcclOps.td index 3219bda55..b0278fe98 100644 --- a/compiler/include/byteir/Dialect/Lccl/LcclOps.td +++ b/compiler/include/byteir/Dialect/Lccl/LcclOps.td @@ -78,7 +78,7 @@ def Lccl_SendOp : Lccl_Op<"send"> { let arguments = (ins Arg:$src, - Arg, "", [MemRead]>:$dynamic_target_index, + Optional :$dynamic_target_index, BoolAttr:$synchronous, OptionalAttr:$target_index ); @@ -94,7 +94,7 @@ def Lccl_RecvOp : Lccl_Op<"recv"> { let arguments = (ins Arg:$src, - Arg, "", [MemRead]>:$dynamic_source_index, + Optional :$dynamic_source_index, BoolAttr:$synchronous, OptionalAttr:$source_index );