PaddlePaddle · JZ-LIANG · May 30, 2024 · May 10, 2024 · May 10, 2024 · May 16, 2024
diff --git a/python/paddle/distributed/communication/stream/broadcast.py b/python/paddle/distributed/communication/stream/broadcast.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle import framework
+from paddle import _C_ops, framework
 from paddle.base import data_feeder
 from paddle.distributed.communication.group import (
     _get_global_group,
     _get_or_throw_group_rank,
     _warn_cur_rank_not_in_group,
 )
+from paddle.distributed.communication.reduce import _to_inplace_op
+from paddle.framework import in_pir_mode
 
 
 def _broadcast_in_dygraph(
@@ -59,6 +61,11 @@ def _broadcast_in_static_mode(
     helper = framework.LayerHelper(op_type, **locals())
     ring_id = 0 if group is None else group.id
 
+    if in_pir_mode():
+        op_type = _to_inplace_op(op_type)
+        getattr(_C_ops, op_type)(tensor, src_rank_in_group, sync_op, ring_id)
+        return
+
     helper.append_op(
         type=op_type,
         inputs={'X': [tensor]},

diff --git a/test/collective/process_group_nccl_pir.py b/test/collective/process_group_nccl_pir.py
@@ -321,6 +321,66 @@ def test_allreduce_prod_with_0d_input(self):
                         np.multiply(x_np, y_np), y_out
                     )
 
+    def test_broadcast(self):
+        pg = self.pg
+        # rank 0
+        x_np = np.random.random(self.shape).astype(self.dtype)
+        # rank 1
+        y_np = np.random.random(self.shape).astype(self.dtype)
+        with paddle.pir_utils.IrGuard():
+            main_program = paddle.static.Program()
+            startup_program = paddle.static.Program()
+            with paddle.static.program_guard(main_program, startup_program):
+                x = paddle.static.data(
+                    name="x", shape=self.shape, dtype=self.dtype
+                )
+                y = paddle.static.data(
+                    name="y", shape=self.shape, dtype=self.dtype
+                )
+                broadcast_result = paddle.assign(x)
+                exe = paddle.static.Executor()
+                if pg.rank() == 0:
+                    dist.broadcast(x, 0, sync_op=False)
+                else:
+                    dist.broadcast(y, 0)
+                (x_out, y_out) = exe.run(
+                    main_program,
+                    feed={"x": x_np, "y": y_np},
+                    fetch_list=[x, y],
+                )
+                if pg.rank() == 0:
+                    np.testing.assert_array_equal(broadcast_result, x_out)
+                else:
+                    np.testing.assert_array_equal(broadcast_result, y_out)
+
+    def test_broadcast_with_0d_input(self):
+        pg = self.pg
+        # rank 0
+        x_np = np.random.random([]).astype(self.dtype)
+        # rank 1
+        y_np = np.random.random([]).astype(self.dtype)
+        with paddle.pir_utils.IrGuard():
+            main_program = paddle.static.Program()
+            startup_program = paddle.static.Program()
+            with paddle.static.program_guard(main_program, startup_program):
+                x = paddle.static.data(name="x", shape=[], dtype=self.dtype)
+                y = paddle.static.data(name="y", shape=[], dtype=self.dtype)
+                broadcast_result = paddle.assign(x)
+                exe = paddle.static.Executor()
+                if pg.rank() == 0:
+                    dist.broadcast(x, 0, sync_op=False)
+                else:
+                    dist.broadcast(y, 0)
+                (x_out, y_out) = exe.run(
+                    main_program,
+                    feed={"x": x_np, "y": y_np},
+                    fetch_list=[x, y],
+                )
+                if pg.rank() == 0:
+                    np.testing.assert_array_equal(broadcast_result, x_out)
+                else:
+                    np.testing.assert_array_equal(broadcast_result, y_out)
+
 
 class TestProcessGroupFp16(TestProcessGroupFp32):
     def setUp(self):