From d8a46714aefa9d97c5d2d14a208cfa9f9a6d18ba Mon Sep 17 00:00:00 2001
From: linuxlonelyeagle <lonelyeagle.02@bytedance.com>
Date: Mon, 25 Mar 2024 15:51:33 +0800
Subject: [PATCH 1/2] add lccl op size-effect.

---
 .../include/byteir/Dialect/Lccl/LcclOps.td    | 31 +++++++++---------
 compiler/test/Dialect/Ccl/ccl_bufferize.mlir  | 32 +++++++++----------
 2 files changed, 32 insertions(+), 31 deletions(-)
diff --git a/compiler/include/byteir/Dialect/Lccl/LcclOps.td b/compiler/include/byteir/Dialect/Lccl/LcclOps.td
index 004b120a2..3219bda55 100644
--- a/compiler/include/byteir/Dialect/Lccl/LcclOps.td
+++ b/compiler/include/byteir/Dialect/Lccl/LcclOps.td
@@ -20,6 +20,7 @@
 #define BYTEIR_DIALECT_LCCL_LCCL_OPS
 
 include "byteir/Dialect/Lccl/LcclBase.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
 
 //===----------------------------------------------------------------------===//
 // Lccl Dialect operations.
@@ -59,8 +60,8 @@ def Lccl_BroadcastOp : Lccl_ReplicaGroupsOp<"broadcast"> {
   }];
 
   let arguments = (ins
-    AnyMemRef:$src,
-    Optional<AnyMemRef>:$dynamic_replica_groups,
+    Arg<AnyMemRef, "", [MemWrite, MemRead]>:$src,
+    Arg<Optional<AnyMemRef>, "", [MemRead]>:$dynamic_replica_groups,
     BoolAttr:$synchronous,
     OptionalAttr<IndexListArrayAttr>:$replica_groups,
     OptionalAttr<I64Attr>:$unique_id
@@ -76,8 +77,8 @@ def Lccl_SendOp : Lccl_Op<"send"> {
   }];
 
   let arguments = (ins
-    AnyMemRef:$src,
-    Optional<I64>:$dynamic_target_index,
+    Arg<AnyMemRef, "", [MemRead]>:$src,
+    Arg<Optional<I64>, "", [MemRead]>:$dynamic_target_index,
     BoolAttr:$synchronous,
     OptionalAttr<I64Attr>:$target_index
   );
@@ -92,8 +93,8 @@ def Lccl_RecvOp : Lccl_Op<"recv"> {
   }];
 
   let arguments = (ins
-    AnyMemRef:$src,
-    Optional<I64>:$dynamic_source_index,
+    Arg<AnyMemRef, "", [MemWrite]>:$src,
+    Arg<Optional<I64>, "", [MemRead]>:$dynamic_source_index,
     BoolAttr:$synchronous,
     OptionalAttr<I64Attr>:$source_index
   );
@@ -115,9 +116,9 @@ def Lccl_AllReduceOp : Lccl_ReplicaGroupsOp<"all_reduce"> {
   }];
 
   let arguments = (ins
-    AnyMemRef:$src,
-    AnyMemRef:$target,
-    Optional<AnyMemRef>:$dynamic_replica_groups,
+    Arg<AnyMemRef, "", [MemRead]>:$src,
+    Arg<AnyMemRef, "", [MemWrite]>:$target,
+    Arg<Optional<AnyMemRef>, "", [MemRead]>:$dynamic_replica_groups,
     BoolAttr:$synchronous,
     StrAttr:$reduction,
     OptionalAttr<IndexListArrayAttr>:$replica_groups,
@@ -139,9 +140,9 @@ def Lccl_AllGatherOp : Lccl_ReplicaGroupsOp<"all_gather"> {
   }];
 
   let arguments = (ins
-    AnyMemRef:$src,
-    AnyMemRef:$target,
-    Optional<AnyMemRef>:$dynamic_replica_groups,
+    Arg<AnyMemRef, "", [MemRead]>:$src,
+    Arg<AnyMemRef, "", [MemWrite]>:$target,
+    Arg<Optional<AnyMemRef>, "", [MemRead]>:$dynamic_replica_groups,
     BoolAttr:$synchronous,
     I64Attr:$axis,
     OptionalAttr<IndexListArrayAttr>:$replica_groups,
@@ -162,9 +163,9 @@ def Lccl_ReduceScatterOp : Lccl_ReplicaGroupsOp<"reduce_scatter"> {
   }];
 
   let arguments = (ins
-    AnyMemRef:$src,
-    AnyMemRef:$target,
-    Optional<AnyMemRef>:$dynamic_replica_groups,
+    Arg<AnyMemRef, "", [MemRead]>:$src,
+    Arg<AnyMemRef, "", [MemWrite]>:$target,
+    Arg<Optional<AnyMemRef>, "", [MemRead]>:$dynamic_replica_groups,
     BoolAttr:$synchronous,
     StrAttr:$reduction,
     I64Attr:$axis,
diff --git a/compiler/test/Dialect/Ccl/ccl_bufferize.mlir b/compiler/test/Dialect/Ccl/ccl_bufferize.mlir
index 1afd3c61b..f9c064ca2 100644
--- a/compiler/test/Dialect/Ccl/ccl_bufferize.mlir
+++ b/compiler/test/Dialect/Ccl/ccl_bufferize.mlir
@@ -1,7 +1,7 @@
 // RUN: byteir-opt %s  -byteir-one-shot-bufferize -split-input-file | FileCheck %s
 
 func.func @broadcast(%arg0: tensor<2x3x8xf32>) -> tensor<2x3x8xf32> {
-  %0 = "ccl.broadcast"(%arg0) {replica_groups = [[2, 3]], synchronous = true} : (tensor<2x3x8xf32>) -> tensor<2x3x8xf32>   
+  %0 = ccl.broadcast %arg0 {replica_groups = [[2, 3]], synchronous = true} : (tensor<2x3x8xf32>) -> tensor<2x3x8xf32>   
   return %0 : tensor<2x3x8xf32>
 }
 
@@ -14,7 +14,7 @@ func.func @broadcast(%arg0: tensor<2x3x8xf32>) -> tensor<2x3x8xf32> {
 // -----
 
 func.func @broadcast_dynamic(%arg0: tensor<2x3x8xf32>, %arg1: tensor<1x4xindex>) -> tensor<2x3x8xf32> {
-  %0 = "ccl.broadcast"(%arg0, %arg1) {synchronous = true} : (tensor<2x3x8xf32>, tensor<1x4xindex>) -> tensor<2x3x8xf32>   
+  %0 = ccl.broadcast %arg0, %arg1 {synchronous = true} : (tensor<2x3x8xf32>, tensor<1x4xindex>) -> tensor<2x3x8xf32>   
   return %0 : tensor<2x3x8xf32>
 }
 // CHECK-LABEL:   func.func @broadcast_dynamic(
@@ -27,7 +27,7 @@ func.func @broadcast_dynamic(%arg0: tensor<2x3x8xf32>, %arg1: tensor<1x4xindex>)
 // -----
 
 func.func @send(%arg0: tensor<3xf32>) -> tensor<3xf32> {
-  %0 = "ccl.send"(%arg0){ synchronous = true, target_index = 0 : i64 }: (tensor<3xf32>) -> tensor<3xf32>
+  %0 = ccl.send %arg0 { synchronous = true, target_index = 0 : i64 }: (tensor<3xf32>) -> tensor<3xf32>
   return %0 : tensor<3xf32>
 }
 // CHECK-LABEL:   func.func @send(
@@ -40,7 +40,7 @@ func.func @send(%arg0: tensor<3xf32>) -> tensor<3xf32> {
 
 func.func @send_dynamic(%arg0: tensor<3xf32>) -> tensor<3xf32> {
   %target_index = arith.constant 0 : i64
-  %0 = "ccl.send"(%arg0, %target_index) { synchronous = true } : (tensor<3xf32>, i64) -> tensor<3xf32>
+  %0 = ccl.send %arg0, %target_index { synchronous = true } : (tensor<3xf32>, i64) -> tensor<3xf32>
   return %0 : tensor<3xf32>
 }
 // CHECK-LABEL:   func.func @send_dynamic(
@@ -53,7 +53,7 @@ func.func @send_dynamic(%arg0: tensor<3xf32>) -> tensor<3xf32> {
 // -----
 
 func.func @recv(%arg0: tensor<3xf32>) -> tensor<3xf32> {
-  %0 = "ccl.recv"(%arg0){ synchronous = true, source_index = 0 : i64 } : (tensor<3xf32>) -> tensor<3xf32>
+  %0 = ccl.recv %arg0 { synchronous = true, source_index = 0 : i64 } : (tensor<3xf32>) -> tensor<3xf32>
   return %0 : tensor<3xf32>
 }
 // CHECK-LABEL:   func.func @recv(
@@ -66,7 +66,7 @@ func.func @recv(%arg0: tensor<3xf32>) -> tensor<3xf32> {
 
 func.func @recv_dynamic(%arg0: tensor<3xf32>) -> tensor<3xf32> {
     %target_index = arith.constant 0 : i64
-    %0 = "ccl.recv"(%arg0, %target_index) { synchronous = true } : (tensor<3xf32>, i64) -> tensor<3xf32>
+    %0 = ccl.recv %arg0, %target_index { synchronous = true } : (tensor<3xf32>, i64) -> tensor<3xf32>
     return %0 : tensor<3xf32>
 }
 
@@ -80,7 +80,7 @@ func.func @recv_dynamic(%arg0: tensor<3xf32>) -> tensor<3xf32> {
 // -----
 
 func.func @all_gather_0(%arg0: tensor<4x4xf32>) -> tensor<8x4xf32> {
-    %0 = "ccl.all_gather"(%arg0) { replica_groups = [[0, 1] ,[2, 3]], axis = 0 : i64 , synchronous = true }: (tensor<4x4xf32>) -> tensor<8x4xf32>
+    %0 = ccl.all_gather %arg0 { replica_groups = [[0, 1] ,[2, 3]], axis = 0 : i64 , synchronous = true }: (tensor<4x4xf32>) -> tensor<8x4xf32>
     return %0 : tensor<8x4xf32>
 }
 // CHECK-LABEL:   func.func @all_gather_0(
@@ -93,7 +93,7 @@ func.func @all_gather_0(%arg0: tensor<4x4xf32>) -> tensor<8x4xf32> {
 // -----
 
 func.func @all_gather_1(%arg0: tensor<4x4xf32>) -> tensor<4x8xf32> {
-    %0 = "ccl.all_gather"(%arg0) { replica_groups = [[0, 1] ,[2, 3]], axis = 1 : i64 , synchronous = true }: (tensor<4x4xf32>) -> tensor<4x8xf32>
+    %0 = ccl.all_gather %arg0 { replica_groups = [[0, 1] ,[2, 3]], axis = 1 : i64 , synchronous = true }: (tensor<4x4xf32>) -> tensor<4x8xf32>
     return %0 : tensor<4x8xf32>
 }
 // CHECK-LABEL:   func.func @all_gather_1(
@@ -106,7 +106,7 @@ func.func @all_gather_1(%arg0: tensor<4x4xf32>) -> tensor<4x8xf32> {
 // -----
 
 func.func @all_gather_dynamic_0(%arg0: tensor<4x4xf32>, %arg1: tensor<2x2xindex>) -> tensor<8x4xf32> {
-    %0 = "ccl.all_gather"(%arg0, %arg1) {axis=0 : i64, synchronous=true}: (tensor<4x4xf32>, tensor<2x2xindex>) -> tensor<8x4xf32>
+    %0 = ccl.all_gather %arg0, %arg1 {axis=0 : i64, synchronous=true}: (tensor<4x4xf32>, tensor<2x2xindex>) -> tensor<8x4xf32>
     return %0 : tensor<8x4xf32>
 }
 // CHECK-LABEL:   func.func @all_gather_dynamic_0(
@@ -120,7 +120,7 @@ func.func @all_gather_dynamic_0(%arg0: tensor<4x4xf32>, %arg1: tensor<2x2xindex>
 // -----
 
 func.func @all_gather_dynamic_1(%arg0: tensor<4x4xf32>, %arg1: tensor<2x2xindex>) -> tensor<4x8xf32> {
-    %0 = "ccl.all_gather"(%arg0, %arg1) {axis=1 : i64, synchronous=true}: (tensor<4x4xf32>, tensor<2x2xindex>) -> tensor<4x8xf32>
+    %0 = ccl.all_gather %arg0, %arg1 {axis=1 : i64, synchronous=true}: (tensor<4x4xf32>, tensor<2x2xindex>) -> tensor<4x8xf32>
     return %0 : tensor<4x8xf32>
 }
 // CHECK-LABEL:   func.func @all_gather_dynamic_1(
@@ -134,7 +134,7 @@ func.func @all_gather_dynamic_1(%arg0: tensor<4x4xf32>, %arg1: tensor<2x2xindex>
 // -----
 
 func.func @all_reduce(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-    %0 = "ccl.all_reduce"(%arg0) {reduction = "sum", synchronous=true, replica_groups = [[0, 1] ,[2, 3]]}: (tensor<4xf32>) -> tensor<4xf32>
+    %0 = ccl.all_reduce %arg0 {reduction = "sum", synchronous=true, replica_groups = [[0, 1] ,[2, 3]]}: (tensor<4xf32>) -> tensor<4xf32>
     return %0 : tensor<4xf32>
 }
 // CHECK-LABEL:   func.func @all_reduce(
@@ -147,7 +147,7 @@ func.func @all_reduce(%arg0: tensor<4xf32>) -> tensor<4xf32> {
 // -----
 
 func.func @all_reduce_dynamic(%arg0: tensor<4xf32>, %arg1:tensor<1x4xi64>) -> tensor<4xf32> {
-    %0 = "ccl.all_reduce"(%arg0, %arg1) {reduction = "sum", synchronous=true}: (tensor<4xf32>, tensor<1x4xi64>) -> tensor<4xf32>
+    %0 = ccl.all_reduce %arg0, %arg1 {reduction = "sum", synchronous=true}: (tensor<4xf32>, tensor<1x4xi64>) -> tensor<4xf32>
     return %0 : tensor<4xf32>
 }
 // CHECK-LABEL:   func.func @all_reduce_dynamic(
@@ -161,7 +161,7 @@ func.func @all_reduce_dynamic(%arg0: tensor<4xf32>, %arg1:tensor<1x4xi64>) -> te
 // -----
 
 func.func @reduce_scatter_0(%arg0: tensor<4x4xf32>) -> tensor<1x4xf32> {
-    %0 = "ccl.reduce_scatter"(%arg0) { reduction="sum", replica_groups = [[0, 1, 2, 3]], axis = 0 : i64 , synchronous=true } : (tensor<4x4xf32>) -> tensor<1x4xf32>
+    %0 = ccl.reduce_scatter %arg0 { reduction="sum", replica_groups = [[0, 1, 2, 3]], axis = 0 : i64 , synchronous=true } : (tensor<4x4xf32>) -> tensor<1x4xf32>
     return %0 : tensor<1x4xf32>
 }
 
@@ -175,7 +175,7 @@ func.func @reduce_scatter_0(%arg0: tensor<4x4xf32>) -> tensor<1x4xf32> {
 // -----
 
 func.func @reduce_scatter_1(%arg0: tensor<4x4xf32>) -> tensor<4x1xf32> {
-    %0 = "ccl.reduce_scatter"(%arg0) { reduction="sum", replica_groups = [[0, 1, 2, 3]], axis = 1 : i64 , synchronous=true } : (tensor<4x4xf32>) -> tensor<4x1xf32>
+    %0 = ccl.reduce_scatter %arg0 { reduction="sum", replica_groups = [[0, 1, 2, 3]], axis = 1 : i64 , synchronous=true } : (tensor<4x4xf32>) -> tensor<4x1xf32>
     return %0 : tensor<4x1xf32>
 }
 
@@ -189,7 +189,7 @@ func.func @reduce_scatter_1(%arg0: tensor<4x4xf32>) -> tensor<4x1xf32> {
 // -----
 
 func.func @reduce_scatter_dynamic_0(%arg0: tensor<4x4xf32>, %arg1: tensor<2x2xindex>) -> tensor<2x4xf32> {
-    %0 = "ccl.reduce_scatter"(%arg0, %arg1) { axis = 0 : i64, synchronous = true, reduction = "sum" }: (tensor<4x4xf32>, tensor<2x2xindex>) -> tensor<2x4xf32>
+    %0 = ccl.reduce_scatter %arg0, %arg1 { axis = 0 : i64, synchronous = true, reduction = "sum" }: (tensor<4x4xf32>, tensor<2x2xindex>) -> tensor<2x4xf32>
     return %0 : tensor<2x4xf32>
 }
 // CHECK-LABEL:   func.func @reduce_scatter_dynamic_0(
@@ -203,7 +203,7 @@ func.func @reduce_scatter_dynamic_0(%arg0: tensor<4x4xf32>, %arg1: tensor<2x2xin
 // -----
 
 func.func @reduce_scatter_dynamic_1(%arg0: tensor<4x4xf32>, %arg1: tensor<2x2xindex>) -> tensor<4x2xf32> {
-    %0 = "ccl.reduce_scatter"(%arg0, %arg1) { axis=1 : i64, synchronous=true, reduction= "sum" } : (tensor<4x4xf32>, tensor<2x2xindex>) -> tensor<4x2xf32>
+    %0 = ccl.reduce_scatter %arg0, %arg1 { axis=1 : i64, synchronous=true, reduction= "sum" } : (tensor<4x4xf32>, tensor<2x2xindex>) -> tensor<4x2xf32>
     return %0 : tensor<4x2xf32>
 }
 // CHECK-LABEL:   func.func @reduce_scatter_dynamic_1(

From 3ed022bc53cfb7371f24550346c88ba256b366fa Mon Sep 17 00:00:00 2001
From: linuxlonelyeagle <lonelyeagle.02@bytedance.com>
Date: Mon, 25 Mar 2024 17:47:29 +0800
Subject: [PATCH 2/2] fix nit.

---
 compiler/include/byteir/Dialect/Lccl/LcclOps.td | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler/include/byteir/Dialect/Lccl/LcclOps.td b/compiler/include/byteir/Dialect/Lccl/LcclOps.td
index 3219bda55..b0278fe98 100644
--- a/compiler/include/byteir/Dialect/Lccl/LcclOps.td
+++ b/compiler/include/byteir/Dialect/Lccl/LcclOps.td
@@ -78,7 +78,7 @@ def Lccl_SendOp : Lccl_Op<"send"> {
 
   let arguments = (ins
     Arg<AnyMemRef, "", [MemRead]>:$src,
-    Arg<Optional<I64>, "", [MemRead]>:$dynamic_target_index,
+    Optional<I64> :$dynamic_target_index,
     BoolAttr:$synchronous,
     OptionalAttr<I64Attr>:$target_index
   );
@@ -94,7 +94,7 @@ def Lccl_RecvOp : Lccl_Op<"recv"> {
 
   let arguments = (ins
     Arg<AnyMemRef, "", [MemWrite]>:$src,
-    Arg<Optional<I64>, "", [MemRead]>:$dynamic_source_index,
+    Optional<I64> :$dynamic_source_index,
     BoolAttr:$synchronous,
     OptionalAttr<I64Attr>:$source_index
   );