PaddlePaddle · HydrogenSulfate · May 24, 2024 · May 19, 2024 · May 19, 2024 · May 20, 2024
@@ -29,6 +29,7 @@
 - maximum
 - minimum
 - prod
+- cumprod
 - roll
 - scatter
 - scatter_nd_add

@@ -1071,6 +1071,47 @@ void gather_nd_grad(const Tensor& x,
   }
 }
 
+template <typename T>
+void cumprod_grad(const Tensor& x,
+                  const Tensor& out,
+                  const Tensor& out_grad,
+                  int dim,
+                  bool exclusive,
+                  bool reverse,
+                  Tensor* x_grad) {
+  if (x_grad) {
+    // dx = cumsum(out * out_grad, dim, false, exclusive, !reverse) / x
+    std::vector<int64_t> x_dim = common::vectorize<int64_t>(x.dims());
+    auto zero_tensor = full<T>(x_dim, 0.0, x.dtype());
+    auto zero_mask = cast<T>(equal<T>(x, zero_tensor), x.dtype());
+    // determine the index of first zero
+    auto zero_mask_cumsum_inclusive =
+        cumsum<T>(zero_mask, dim, false, false, reverse);
+    auto zero_mask_cumsum_exclusive =
+        cumsum<T>(zero_mask, dim, false, true, reverse);
+    auto zero_mask_cumsum =
+        zero_mask_cumsum_inclusive + zero_mask_cumsum_exclusive;
+    auto ones_tensor = full<T>(x_dim, 1.0, x.dtype());
+    auto first_zero_mask =
+        cast<T>(equal<T>(zero_mask_cumsum, ones_tensor), x.dtype());
+    // compute the grad for position with value not equal to 0
+    auto common_dx = cumsum<T>(out * out_grad, dim, false, exclusive, !reverse);
+    // fill the positions of 0 with 1.
+    auto replace_one = (1 - zero_mask) * x + zero_mask;
+    // fill the first positions of 0 with 1.
+    auto replace_first_one = (1 - first_zero_mask) * x + first_zero_mask;
+    // recompute the grad of the first position with 0
+    auto cumprod_recompute =
+        cumprod<T>(replace_first_one, dim, exclusive, reverse);
+    auto zeros_dx = cumsum<T>(
+        cumprod_recompute * out_grad, dim, false, exclusive, !reverse);
+    auto x_grad_res =
+        ((1 - first_zero_mask) * common_dx + first_zero_mask * zeros_dx) /
+        replace_one;
+    set_output<T>(x_grad_res, x_grad);
+  }
+}
+
 template <typename T>
 void prod_grad(const Tensor& x,
                const Tensor& out,

diff --git a/paddle/fluid/primitive/rule/vjp/details.h b/paddle/fluid/primitive/rule/vjp/details.h
@@ -59,6 +59,47 @@ void cumsum_grad(const Tensor& x,
   }
 }
 
+template <typename T>
+void cumprod_grad(const Tensor& x,
+                  const Tensor& out,
+                  const Tensor& out_grad,
+                  int dim,
+                  bool exclusive,
+                  bool reverse,
+                  Tensor* x_grad) {
+  if (x_grad) {
+    // dx = cumsum(out * out_grad, dim, false, exclusive, !reverse) / x
+    std::vector<int64_t> x_dim = common::vectorize<int64_t>(x.dims());
+    auto zero_tensor = full<T>(x_dim, 0.0, x.dtype());
+    auto zero_mask = cast<T>(equal<T>(x, zero_tensor), x.dtype());
+    // determine the index of first zero
+    auto zero_mask_cumsum_inclusive =
+        cumsum<T>(zero_mask, dim, false, false, reverse);
+    auto zero_mask_cumsum_exclusive =
+        cumsum<T>(zero_mask, dim, false, true, reverse);
+    auto zero_mask_cumsum =
+        zero_mask_cumsum_inclusive + zero_mask_cumsum_exclusive;
+    auto ones_tensor = full<T>(x_dim, 1.0, x.dtype());
+    auto first_zero_mask =
+        cast<T>(equal<T>(zero_mask_cumsum, ones_tensor), x.dtype());
+    // compute the grad for position with value not equal to 0
+    auto common_dx = cumsum<T>(out * out_grad, dim, false, exclusive, !reverse);
+    // fill the positions of 0 with 1.
+    auto replace_one = (1 - zero_mask) * x + zero_mask;
+    // fill the first positions of 0 with 1.
+    auto replace_first_one = (1 - first_zero_mask) * x + first_zero_mask;
+    // recompute the grad of the first position with 0
+    auto cumprod_recompute =
+        cumprod<T>(replace_first_one, dim, exclusive, reverse);
+    auto zeros_dx = cumsum<T>(
+        cumprod_recompute * out_grad, dim, false, exclusive, !reverse);
+    auto x_grad_res =
+        ((1 - first_zero_mask) * common_dx + first_zero_mask * zeros_dx) /
+        replace_one;
+    set_output<T>(x_grad_res, x_grad);
+  }
+}
+
 template <typename T>
 void divide_grad(const Tensor& x,
                  const Tensor& y,

diff --git a/paddle/phi/ops/yaml/backward.yaml b/paddle/phi/ops/yaml/backward.yaml
@@ -572,6 +572,7 @@
     param: [x]
   kernel :
     func : cumprod_grad
+  composite: cumprod_grad(x, out, out_grad, dim, exclusive, reverse, x_grad)
 
 - backward_op : cumsum_grad
   forward : cumsum(Tensor x, Scalar axis=-1, bool flatten=false, bool exclusive=false, bool reverse=false) -> Tensor(out)

diff --git a/test/prim/prim/vjp/eager/test_comp_eager_cumprod_grad.py b/test/prim/prim/vjp/eager/test_comp_eager_cumprod_grad.py
@@ -0,0 +1,147 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import random
+import unittest
+
+import numpy as np
+import parameterized as param
+
+import paddle
+from paddle.base import core
+
+
+@param.parameterized_class(
+    ('primal', 'dtype'),
+    [
+        (
+            np.random.uniform(1, 5, (50,)),
+            np.float32,
+        ),
+        (
+            np.random.rand(10, 10),
+            np.float32,
+        ),
+        (
+            np.random.rand(2, 3, 4),
+            np.float32,
+        ),
+        (
+            np.random.rand(2, 3, 3, 4),
+            np.float32,
+        ),
+        (
+            np.random.rand(2, 3, 3, 4, 5),
+            np.float32,
+        ),
+        (
+            np.random.randint(1, 100, (2, 3, 4)),
+            np.int64,
+        ),
+    ],
+)
+class TestCumprodGradComp(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.primal = cls.primal.astype(cls.dtype)
+        cls.zero_nums = [0, 1, 10, int(np.prod(cls.primal.shape))]
+
+    def test_cumprod_grad_comp(self):
+        def actual(primal, dim):
+            paddle.disable_static()
+            core.set_prim_eager_enabled(True)
+            x = paddle.to_tensor(primal, dtype='float32', stop_gradient=False)
+            x.stop_gradient = False
+            y = paddle.cumprod(x, dim=dim)
+            x_cotangent = paddle.grad(
+                y, x, create_graph=True, retain_graph=True
+            )
+            return x_cotangent[0]
+
+        def desired(primal, dim):
+            paddle.disable_static()
+            core.set_prim_eager_enabled(False)
+            x = paddle.to_tensor(primal, dtype='float32', stop_gradient=False)
+            x.stop_gradient = False
+            y = paddle.cumprod(x, dim=dim)
+            x_cotangent = paddle.grad(
+                y, x, create_graph=False, retain_graph=True
+            )
+            return x_cotangent[0]
+
+        for zero_num in self.zero_nums:
+            shape = self.primal.shape
+            x = self.primal.flatten()
+            indices = random.sample(range(x.size), zero_num)
+            for i in indices:
+                x[i] = 0
+            x = np.reshape(x, shape)
+            for i in range(len(self.primal.shape)):
+                np.testing.assert_allclose(
+                    actual=actual(x, i),
+                    desired=desired(x, i),
+                    rtol=1e-6,
+                    atol=0,
+                )
+        core.set_prim_eager_enabled(False)
+
+
+@param.parameterized_class(
+    ('primal', 'dtype'),
+    [
+        (
+            np.random.uniform(1, 5, ()),
+            np.float32,
+        ),
+    ],
+)
+class TestCumprodGradComp0D(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.primal = cls.primal.astype(cls.dtype)
+
+    def test_cumprod_grad_comp_0d(self):
+        def actual(primal, dim):
+            paddle.disable_static()
+            core.set_prim_eager_enabled(True)
+            x = paddle.to_tensor(primal, dtype='float32', stop_gradient=False)
+            x.stop_gradient = False
+            y = paddle.cumprod(x, dim=dim)
+            x_cotangent = paddle.grad(
+                y, x, create_graph=True, retain_graph=True
+            )
+            return x_cotangent[0]
+
+        def desired(primal, dim):
+            paddle.disable_static()
+            core.set_prim_eager_enabled(False)
+            x = paddle.to_tensor(primal, dtype='float32', stop_gradient=False)
+            x.stop_gradient = False
+            y = paddle.cumprod(x, dim=dim)
+            x_cotangent = paddle.grad(
+                y, x, create_graph=False, retain_graph=True
+            )
+            return x_cotangent[0]
+
+        np.testing.assert_allclose(
+            actual=actual(self.primal, 0),
+            desired=desired(self.primal, 0),
+            rtol=1e-6,
+            atol=0,
+        )
+        core.set_prim_eager_enabled(False)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/prim/prim/vjp/static/CMakeLists.txt b/test/prim/prim/vjp/static/CMakeLists.txt
@@ -16,3 +16,4 @@ set_tests_properties(test_comp_add_grad PROPERTIES TIMEOUT 60)
 set_tests_properties(test_comp_sub_grad PROPERTIES TIMEOUT 60)
 set_tests_properties(test_comp_add_tanh_grad PROPERTIES TIMEOUT 60)
 set_tests_properties(test_comp_sqrt_grad PROPERTIES TIMEOUT 60)
+set_tests_properties(test_comp_cumprod_grad PROPERTIES TIMEOUT 150)