PaddlePaddle · SigureMo · Nov 29, 2023 · Nov 21, 2023 · Nov 22, 2023 · Nov 22, 2023
@@ -1738,19 +1738,18 @@ def clear_gradient(self):
                 >>> import numpy as np
 
                 >>> x = np.ones([2, 2], np.float32)
-                >>> with base.dygraph.guard():
-                ...     inputs2 = []
-                ...     for _ in range(10):
-                ...         tmp = base.dygraph.base.to_variable(x)
-                ...         tmp.stop_gradient=False
-                ...         inputs2.append(tmp)
-                ...     ret2 = paddle.add_n(inputs2)
-                ...     loss2 = paddle.sum(ret2)
-                ...     loss2.retain_grads()
-                ...     loss2.backward()
-                ...     print(loss2.gradient())
-                ...     loss2.clear_gradient()
-                ...     print("After clear {}".format(loss2.gradient()))
+                >>> inputs2 = []
+                >>> for _ in range(10):
+                >>>     tmp = base.dygraph.base.to_variable(x)
+                >>>     tmp.stop_gradient=False
+                >>>     inputs2.append(tmp)
+                >>> ret2 = paddle.add_n(inputs2)
+                >>> loss2 = paddle.sum(ret2)
+                >>> loss2.retain_grads()
+                >>> loss2.backward()
+                >>> print(loss2.gradient())
+                >>> loss2.clear_gradient()
+                >>> print("After clear {}".format(loss2.gradient()))
                 1.0
                 After clear 0.0
         """

diff --git a/python/paddle/jit/dy2static/convert_operators.py b/python/paddle/jit/dy2static/convert_operators.py
@@ -39,8 +39,10 @@
 
 
 def convert_attr(x, attr):
-    if isinstance(x, (Variable, OpResult)) and attr == "size":
+    if isinstance(x, Variable) and attr == "size":
         return x.size()
+    elif isinstance(x, OpResult) and attr == "size":
+        return x.size
     else:
         return getattr(x, attr)
 

diff --git a/python/paddle/pir/math_op_patch.py b/python/paddle/pir/math_op_patch.py
@@ -16,6 +16,7 @@
 import warnings
 
 from paddle.base.libpaddle import DataType
+from paddle.base.wrapped_decorator import wrap_decorator
 
 from . import OpResult
 
@@ -31,6 +32,21 @@
 ]
 
 
+def _fake_interface_only_(func):
+    def __impl__(*args, **kwargs):
+        raise AssertionError(
+            f"'{func.__name__}' only can be called by `paddle.Tensor` in dynamic graph mode. Suggestions:\n"
+            "  1. If you are in static graph mode, you can switch to dynamic graph mode by turning off `paddle.enable_static()` or calling `paddle.disable_static()`.\n"
+            "  2. If you are using `@paddle.jit.to_static`, you can call `paddle.jit.enable_to_static(False)`. "
+            f"If you have to translate dynamic graph to static graph, please use other API to replace '{func.__name__}'."
+        )
+
+    return __impl__
+
+
+fake_interface_only = wrap_decorator(_fake_interface_only_)
+
+
 def create_tensor_with_batchsize(ref_var, value, dtype):
     assert isinstance(ref_var, OpResult)
     value = float(value)
@@ -356,6 +372,43 @@ def clone(self):
         """
         return paddle.assign(self)
 
+    @fake_interface_only
+    def clear_gradient(self):
+        """
+        **Notes**:
+            **1. This API is ONLY available in Dygraph mode**
+
+            **2. Use it only OpResult has gradient, normally we use this for Parameters since other temporal OpResult will be deleted by Python's GC**
+
+        Clear  (set to ``0`` ) the Gradient of Current OpResult
+
+        Returns:  None
+
+        Examples:
+            .. code-block:: python
+
+                >>> import paddle
+                >>> import paddle.base as base
+                >>> import numpy as np
+
+                >>> x = np.ones([2, 2], np.float32)
+                >>> inputs2 = []
+                >>> for _ in range(10):
+                >>>     tmp = base.dygraph.base.to_variable(x)
+                >>>     tmp.stop_gradient=False
+                >>>     inputs2.append(tmp)
+                >>> ret2 = paddle.add_n(inputs2)
+                >>> loss2 = paddle.sum(ret2)
+                >>> loss2.retain_grads()
+                >>> loss2.backward()
+                >>> print(loss2.gradient())
+                >>> loss2.clear_gradient()
+                >>> print("After clear {}".format(loss2.gradient()))
+                1.0
+                After clear 0.0
+        """
+        pass
+
     import paddle
 
     opresult_methods = [
@@ -367,6 +420,7 @@ def clone(self):
         ('astype', astype),
         ('size', _size_),
         ('clone', clone),
+        ('clear_gradient', clear_gradient),
         (
             '__add__',
             _binary_creator_('__add__', paddle.tensor.add, False, _scalar_add_),

@@ -35,8 +35,8 @@
 
 SEED = 2020
 
-if paddle.base.is_compiled_with_cuda():
-    paddle.base.set_flags({'FLAGS_cudnn_deterministic': True})
+if paddle.is_compiled_with_cuda():
+    paddle.set_flags({'FLAGS_cudnn_deterministic': True})
 
 
 class SimpleImgConvPool(paddle.nn.Layer):
@@ -135,9 +135,9 @@ def setUp(self):
         self.epoch_num = 1
         self.batch_size = 64
         self.place = (
-            base.CUDAPlace(0)
-            if base.is_compiled_with_cuda()
-            else base.CPUPlace()
+            paddle.CUDAPlace(0)
+            if paddle.is_compiled_with_cuda()
+            else paddle.CPUPlace()
         )
         self.train_reader = paddle.batch(
             paddle.dataset.mnist.train(),
@@ -177,11 +177,11 @@ def test_mnist_to_static(self):
 
     def test_mnist_declarative_cpu_vs_mkldnn(self):
         dygraph_loss_cpu = self.train_dygraph()
-        base.set_flags({'FLAGS_use_mkldnn': True})
+        paddle.set_flags({'FLAGS_use_mkldnn': True})
         try:
             dygraph_loss_mkldnn = self.train_dygraph()
         finally:
-            base.set_flags({'FLAGS_use_mkldnn': False})
+            paddle.set_flags({'FLAGS_use_mkldnn': False})
         np.testing.assert_allclose(
             dygraph_loss_cpu,
             dygraph_loss_mkldnn,
@@ -193,62 +193,61 @@ def test_mnist_declarative_cpu_vs_mkldnn(self):
 
     def train(self, to_static=False):
         loss_data = []
-        with base.dygraph.guard(self.place):
-            base.default_main_program().random_seed = SEED
-            base.default_startup_program().random_seed = SEED
-            mnist = MNIST()
-            if to_static:
-                mnist = paddle.jit.to_static(mnist, full_graph=True)
-            adam = Adam(learning_rate=0.001, parameters=mnist.parameters())
-
-            for epoch in range(self.epoch_num):
-                start = time()
-                for batch_id, data in enumerate(self.train_reader()):
-                    dy_x_data = np.array(
-                        [x[0].reshape(1, 28, 28) for x in data]
-                    ).astype('float32')
-                    y_data = (
-                        np.array([x[1] for x in data])
-                        .astype('int64')
-                        .reshape(-1, 1)
-                    )
-
-                    img = to_variable(dy_x_data)
-                    label = to_variable(y_data)
-
-                    label.stop_gradient = True
-                    prediction, acc, avg_loss = mnist(img, label=label)
-                    avg_loss.backward()
+        base.default_main_program().random_seed = SEED
+        base.default_startup_program().random_seed = SEED
+        mnist = MNIST()
+        if to_static:
+            mnist = paddle.jit.to_static(mnist, full_graph=True)
+        adam = Adam(learning_rate=0.001, parameters=mnist.parameters())
+
+        for epoch in range(self.epoch_num):
+            start = time()
+            for batch_id, data in enumerate(self.train_reader()):
+                dy_x_data = np.array(
+                    [x[0].reshape(1, 28, 28) for x in data]
+                ).astype('float32')
+                y_data = (
+                    np.array([x[1] for x in data])
+                    .astype('int64')
+                    .reshape(-1, 1)
+                )
 
-                    adam.minimize(avg_loss)
-                    loss_data.append(float(avg_loss))
-                    # save checkpoint
-                    mnist.clear_gradients()
-                    if batch_id % 10 == 0:
-                        print(
-                            "Loss at epoch {} step {}: loss: {:}, acc: {}, cost: {}".format(
-                                epoch,
-                                batch_id,
-                                avg_loss.numpy(),
-                                acc.numpy(),
-                                time() - start,
-                            )
-                        )
-                        start = time()
-                    if batch_id == 50:
-                        mnist.eval()
-                        prediction, acc, avg_loss = mnist(img, label)
-                        loss_data.append(float(avg_loss))
-                        # new save load check
-                        self.check_jit_save_load(
-                            mnist,
-                            [dy_x_data],
-                            [img, label],
-                            to_static,
-                            prediction,
-                            [img.name],
+                img = to_variable(dy_x_data)
+                label = to_variable(y_data)
+
+                label.stop_gradient = True
+                prediction, acc, avg_loss = mnist(img, label=label)
+                avg_loss.backward()
+
+                adam.minimize(avg_loss)
+                loss_data.append(float(avg_loss))
+                # save checkpoint
+                mnist.clear_gradients()
+                if batch_id % 10 == 0:
+                    print(
+                        "Loss at epoch {} step {}: loss: {:}, acc: {}, cost: {}".format(
+                            epoch,
+                            batch_id,
+                            avg_loss.numpy(),
+                            acc.numpy(),
+                            time() - start,
                         )
-                        break
+                    )
+                    start = time()
+                if batch_id == 50:
+                    mnist.eval()
+                    prediction, acc, avg_loss = mnist(img, label)
+                    loss_data.append(float(avg_loss))
+                    # new save load check
+                    self.check_jit_save_load(
+                        mnist,
+                        [dy_x_data],
+                        [img, label],
+                        to_static,
+                        prediction,
+                        [img.name],
+                    )
+                    break
         return loss_data
 
     def check_jit_save_load(

@@ -21,21 +21,18 @@
 import paddle
 
 
-@paddle.jit.to_static
 def tensor_copy_to_cpu(x):
     x = paddle.to_tensor(x)
     y = x.cpu()
     return y
 
 
-@paddle.jit.to_static
 def tensor_copy_to_cuda(x):
     x = paddle.to_tensor(x)
     y = x.cuda()
     return y
 
 
-@paddle.jit.to_static
 def tensor_copy_to_cuda_with_warning(x, device_id=None, blocking=True):
     x = paddle.to_tensor(x)
     y = x.cuda(device_id, blocking)
@@ -46,7 +43,7 @@ class TestTensorCopyToCpuOnDefaultGPU(Dy2StTestBase):
     def _run(self, to_static):
         paddle.jit.enable_to_static(to_static)
         x1 = paddle.ones([1, 2, 3])
-        x2 = tensor_copy_to_cpu(x1)
+        x2 = paddle.jit.to_static(tensor_copy_to_cpu)(x1)
         return x1.place, x2.place, x2.numpy()
 
     @test_legacy_and_pir
@@ -73,12 +70,12 @@ class TestTensorCopyToCUDAOnDefaultGPU(Dy2StTestBase):
     def _run(self, to_static):
         paddle.jit.enable_to_static(to_static)
         x1 = paddle.ones([1, 2, 3])
-        x2 = tensor_copy_to_cuda(x1)
+        x2 = paddle.jit.to_static(tensor_copy_to_cuda)(x1)
         return x1.place, x2.place, x2.numpy()
 
     @test_legacy_and_pir
     def test_tensor_cuda_on_default_gpu(self):
-        if paddle.base.is_compiled_with_cuda():
+        if paddle.is_compiled_with_cuda():
             place = paddle.CUDAPlace(
                 int(os.environ.get('FLAGS_selected_gpus', 0))
             )
@@ -100,7 +97,9 @@ class TestTensorCopyToCUDAWithWarningOnGPU(unittest.TestCase):
     def _run(self, to_static):
         paddle.jit.enable_to_static(to_static)
         x1 = paddle.ones([1, 2, 3])
-        x2 = tensor_copy_to_cuda_with_warning(x1, device_id=1, blocking=False)
+        x2 = paddle.jit.to_static(tensor_copy_to_cuda_with_warning)(
+            x1, device_id=1, blocking=False
+        )
         return x1.place, x2.place, x2.numpy()
 
     def test_with_warning_on_gpu(self):
@@ -114,19 +113,19 @@ def test_with_warning_on_gpu(self):
 
         x1 = paddle.ones([1, 2, 3])
         with self.assertWarns(UserWarning, msg="ignored") as cm:
-            x2 = tensor_copy_to_cuda_with_warning(
+            x2 = paddle.jit.to_static(tensor_copy_to_cuda_with_warning)(
                 x1, device_id=1, blocking=True
             )
         self.assertIn('math_op_patch.py', cm.filename)
 
         with self.assertWarns(UserWarning, msg="ignored") as cm:
-            x2 = tensor_copy_to_cuda_with_warning(
+            x2 = paddle.jit.to_static(tensor_copy_to_cuda_with_warning)(
                 x1, device_id=None, blocking=False
             )
         self.assertIn('math_op_patch.py', cm.filename)
 
         with self.assertWarns(UserWarning, msg="ignored") as cm:
-            x2 = tensor_copy_to_cuda_with_warning(
+            x2 = paddle.jit.to_static(tensor_copy_to_cuda_with_warning)(
                 x1, device_id=2, blocking=False
             )
         self.assertIn('math_op_patch.py', cm.filename)