[auto parallel] shard tensor stop gradient support (#60699)

PaddlePaddle · Jan 10, 2024 · f177fa6 · f177fa6
1 parent 3611e26
commit f177fa6
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 1 deletion.
diff --git a/python/paddle/distributed/auto_parallel/api.py b/python/paddle/distributed/auto_parallel/api.py
@@ -225,9 +225,13 @@ def _init_func(var, block):
 
             return dist_param
         else:
-            return paddle.Tensor(
+            dist_tensor = paddle.Tensor(
                 tensor, process_mesh=mesh, placements=placements, place=place
             )
+            # InitDistTensorWithTensor won't pass the stop gradient attribute,
+            # have to pass it manually.
+            dist_tensor.stop_gradient = tensor.stop_gradient
+            return dist_tensor
     else:
         # TODO(zhiqiu): we need to refine the static shard_tensor
         sharding_specs = get_shard_spec(mesh, placements, tensor.ndim)

diff --git a/test/auto_parallel/test_shard_tensor_api.py b/test/auto_parallel/test_shard_tensor_api.py
@@ -92,6 +92,12 @@ def test_dynamic_mode_property_change(self):
 
         self.assertEqual(d_tensor.process_mesh, self.mesh)
 
+    def test_stop_gradient(self):
+        x = paddle.ones([4, 1024, 512])
+        x.stop_gradient = False
+        x = dist.shard_tensor(x, self.mesh, [Shard(0), Replicate()])
+        assert not x.stop_gradient
+
 
 class TestShardTensorStatic(unittest.TestCase):
     def setUp(self):