Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add cumprod_grad composite #64432

Merged
merged 11 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions paddle/fluid/prim/api/api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
- maximum
- minimum
- prod
- cumprod
- roll
- scatter
- scatter_nd_add
Expand Down
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

此处的实现跟details.h里的实现好像有一些区别?可以确认一下

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Original file line number Diff line number Diff line change
Expand Up @@ -1071,6 +1071,47 @@ void gather_nd_grad(const Tensor& x,
}
}

template <typename T>
void cumprod_grad(const Tensor& x,
const Tensor& out,
const Tensor& out_grad,
int dim,
bool exclusive,
bool reverse,
Tensor* x_grad) {
if (x_grad) {
// dx = cumsum(out * out_grad, dim, false, exclusive, !reverse) / x
std::vector<int64_t> x_dim = common::vectorize<int64_t>(x.dims());
auto zero_tensor = full<T>(x_dim, 0.0, x.dtype());
auto zero_mask = cast<T>(equal<T>(x, zero_tensor), x.dtype());
// determine the index of first zero
auto zero_mask_cumsum_inclusive =
cumsum<T>(zero_mask, dim, false, false, reverse);
auto zero_mask_cumsum_exclusive =
cumsum<T>(zero_mask, dim, false, true, reverse);
auto zero_mask_cumsum =
zero_mask_cumsum_inclusive + zero_mask_cumsum_exclusive;
Copy link
Contributor

@HydrogenSulfate HydrogenSulfate May 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

inclusive + exclusive 是否相当于两倍的exclusive + x?两次cumprod应该比一次cumprod + scale + add耗时会更长,是否可以改成后者实现方式?
zero_mask_cumsum_inclusive + zero_mask_cumsum_exclusive --> scale<T>(zero_mask_cumsum_exclusive, 2) + zero_mask

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

auto ones_tensor = full<T>(x_dim, 1.0, x.dtype());
auto first_zero_mask =
cast<T>(equal<T>(zero_mask_cumsum, ones_tensor), x.dtype());
// compute the grad for position with value not equal to 0
auto common_dx = cumsum<T>(out * out_grad, dim, false, exclusive, !reverse);
// fill the positions of 0 with 1.
auto replace_one = (1 - zero_mask) * x + zero_mask;
// fill the first positions of 0 with 1.
auto replace_first_one = (1 - first_zero_mask) * x + first_zero_mask;
// recompute the grad of the first position with 0
auto cumprod_recompute =
cumprod<T>(replace_first_one, dim, exclusive, reverse);
auto zeros_dx = cumsum<T>(
cumprod_recompute * out_grad, dim, false, exclusive, !reverse);
auto x_grad_res =
((1 - first_zero_mask) * common_dx + first_zero_mask * zeros_dx) /
replace_one;
set_output<T>(x_grad_res, x_grad);
}
}

template <typename T>
void prod_grad(const Tensor& x,
const Tensor& out,
Expand Down
41 changes: 41 additions & 0 deletions paddle/fluid/primitive/rule/vjp/details.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,47 @@ void cumsum_grad(const Tensor& x,
}
}

template <typename T>
void cumprod_grad(const Tensor& x,
const Tensor& out,
const Tensor& out_grad,
int dim,
bool exclusive,
bool reverse,
Tensor* x_grad) {
if (x_grad) {
// dx = cumsum(out * out_grad, dim, false, exclusive, !reverse) / x
std::vector<int64_t> x_dim = common::vectorize<int64_t>(x.dims());
auto zero_tensor = full<T>(x_dim, 0.0, x.dtype());
auto zero_mask = cast<T>(equal<T>(x, zero_tensor), x.dtype());
// determine the index of first zero
auto zero_mask_cumsum_inclusive =
cumsum<T>(zero_mask, dim, false, false, reverse);
auto zero_mask_cumsum_exclusive =
cumsum<T>(zero_mask, dim, false, true, reverse);
auto zero_mask_cumsum =
zero_mask_cumsum_inclusive + zero_mask_cumsum_exclusive;
auto ones_tensor = full<T>(x_dim, 1.0, x.dtype());
auto first_zero_mask =
cast<T>(equal<T>(zero_mask_cumsum, ones_tensor), x.dtype());
// compute the grad for position with value not equal to 0
auto common_dx = cumsum<T>(out * out_grad, dim, false, exclusive, !reverse);
// fill the positions of 0 with 1.
auto replace_one = (1 - zero_mask) * x + zero_mask;
// fill the first positions of 0 with 1.
auto replace_first_one = (1 - first_zero_mask) * x + first_zero_mask;
// recompute the grad of the first position with 0
auto cumprod_recompute =
cumprod<T>(replace_first_one, dim, exclusive, reverse);
auto zeros_dx = cumsum<T>(
cumprod_recompute * out_grad, dim, false, exclusive, !reverse);
auto x_grad_res =
((1 - first_zero_mask) * common_dx + first_zero_mask * zeros_dx) /
replace_one;
set_output<T>(x_grad_res, x_grad);
}
}

template <typename T>
void divide_grad(const Tensor& x,
const Tensor& y,
Expand Down
1 change: 1 addition & 0 deletions paddle/phi/ops/yaml/backward.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,7 @@
param: [x]
kernel :
func : cumprod_grad
composite: cumprod_grad(x, out, out_grad, dim, exclusive, reverse, x_grad)

- backward_op : cumsum_grad
forward : cumsum(Tensor x, Scalar axis=-1, bool flatten=false, bool exclusive=false, bool reverse=false) -> Tensor(out)
Expand Down
147 changes: 147 additions & 0 deletions test/prim/prim/vjp/eager/test_comp_eager_cumprod_grad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import random
import unittest

import numpy as np
import parameterized as param

import paddle
from paddle.base import core


@param.parameterized_class(
('primal', 'dtype'),
[
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

加上一个单元素的测试:np.array(np.rand(), dtype="float32")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

(
np.random.uniform(1, 5, (50,)),
np.float32,
),
(
np.random.rand(10, 10),
np.float32,
),
(
np.random.rand(2, 3, 4),
np.float32,
),
(
np.random.rand(2, 3, 3, 4),
np.float32,
),
(
np.random.rand(2, 3, 3, 4, 5),
np.float32,
),
(
np.random.randint(1, 100, (2, 3, 4)),
np.int64,
),
],
)
class TestCumprodGradComp(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.primal = cls.primal.astype(cls.dtype)
cls.zero_nums = [0, 1, 10, int(np.prod(cls.primal.shape))]

def test_cumprod_grad_comp(self):
def actual(primal, dim):
paddle.disable_static()
core.set_prim_eager_enabled(True)
x = paddle.to_tensor(primal, dtype='float32', stop_gradient=False)
x.stop_gradient = False
y = paddle.cumprod(x, dim=dim)
x_cotangent = paddle.grad(
y, x, create_graph=True, retain_graph=True
)
return x_cotangent[0]

def desired(primal, dim):
paddle.disable_static()
core.set_prim_eager_enabled(False)
x = paddle.to_tensor(primal, dtype='float32', stop_gradient=False)
x.stop_gradient = False
y = paddle.cumprod(x, dim=dim)
x_cotangent = paddle.grad(
y, x, create_graph=False, retain_graph=True
)
return x_cotangent[0]

for zero_num in self.zero_nums:
shape = self.primal.shape
x = self.primal.flatten()
indices = random.sample(range(x.size), zero_num)
for i in indices:
x[i] = 0
x = np.reshape(x, shape)
for i in range(len(self.primal.shape)):
np.testing.assert_allclose(
actual=actual(x, i),
desired=desired(x, i),
rtol=1e-6,
atol=0,
)
core.set_prim_eager_enabled(False)


@param.parameterized_class(
('primal', 'dtype'),
[
(
np.random.uniform(1, 5, ()),
np.float32,
),
],
)
class TestCumprodGradComp0D(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.primal = cls.primal.astype(cls.dtype)

def test_cumprod_grad_comp_0d(self):
def actual(primal, dim):
paddle.disable_static()
core.set_prim_eager_enabled(True)
x = paddle.to_tensor(primal, dtype='float32', stop_gradient=False)
x.stop_gradient = False
y = paddle.cumprod(x, dim=dim)
x_cotangent = paddle.grad(
y, x, create_graph=True, retain_graph=True
)
return x_cotangent[0]

def desired(primal, dim):
paddle.disable_static()
core.set_prim_eager_enabled(False)
x = paddle.to_tensor(primal, dtype='float32', stop_gradient=False)
x.stop_gradient = False
y = paddle.cumprod(x, dim=dim)
x_cotangent = paddle.grad(
y, x, create_graph=False, retain_graph=True
)
return x_cotangent[0]

np.testing.assert_allclose(
actual=actual(self.primal, 0),
desired=desired(self.primal, 0),
rtol=1e-6,
atol=0,
)
core.set_prim_eager_enabled(False)


if __name__ == '__main__':
unittest.main()
1 change: 1 addition & 0 deletions test/prim/prim/vjp/static/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ set_tests_properties(test_comp_add_grad PROPERTIES TIMEOUT 60)
set_tests_properties(test_comp_sub_grad PROPERTIES TIMEOUT 60)
set_tests_properties(test_comp_add_tanh_grad PROPERTIES TIMEOUT 60)
set_tests_properties(test_comp_sqrt_grad PROPERTIES TIMEOUT 60)
set_tests_properties(test_comp_cumprod_grad PROPERTIES TIMEOUT 150)
Loading