Skip to content

Commit

Permalink
add fp16 kernel for clip_op (#36577)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangbo9674 committed Oct 24, 2021
1 parent 1906c74 commit d95dbe2
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 10 deletions.
8 changes: 6 additions & 2 deletions paddle/fluid/operators/clip_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@ REGISTER_OP_CUDA_KERNEL(
clip, ops::ClipKernel<paddle::platform::CUDADeviceContext, float>,
ops::ClipKernel<paddle::platform::CUDADeviceContext, double>,
ops::ClipKernel<paddle::platform::CUDADeviceContext, int>,
ops::ClipKernel<paddle::platform::CUDADeviceContext, int64_t>);
ops::ClipKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::ClipKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);

REGISTER_OP_CUDA_KERNEL(
clip_grad, ops::ClipGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::ClipGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::ClipGradKernel<paddle::platform::CUDADeviceContext, int>,
ops::ClipGradKernel<paddle::platform::CUDADeviceContext, int64_t>);
ops::ClipGradKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::ClipGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
6 changes: 3 additions & 3 deletions paddle/fluid/operators/clip_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class ClipGradFunctor {
public:
explicit ClipGradFunctor(const T min, const T max) : min_(min), max_(max) {}
HOSTDEVICE T operator()(const T& x, const T& y) const {
return (y > min_ && y < max_) ? x : 0;
return (y > min_ && y < max_) ? x : static_cast<T>(0);
}

private:
Expand All @@ -79,7 +79,7 @@ class ClipKernel : public framework::OpKernel<T> {
}
max = static_cast<T>(max);

auto min = context.Attr<float>("min");
auto min = static_cast<T>(context.Attr<float>("min"));
Tensor min_cpu;
if (context.HasInput("Min")) {
auto* min_t = context.Input<Tensor>("Min");
Expand Down Expand Up @@ -156,7 +156,7 @@ class ClipGradKernel : public framework::OpKernel<T> {
}
max = static_cast<T>(max);

auto min = context.Attr<float>("min");
auto min = static_cast<T>(context.Attr<float>("min"));
Tensor min_cpu;
if (context.HasInput("Min")) {
auto* min_t = context.Input<Tensor>("Min");
Expand Down
26 changes: 21 additions & 5 deletions python/paddle/fluid/tests/unittests/test_clip_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def setUp(self):
else:
max_v = self.attrs['max']

input = np.random.random(self.shape).astype("float32")
input = np.random.random(self.shape).astype(self.dtype)
input[np.abs(input - min_v) < self.max_relative_error] = 0.5
input[np.abs(input - max_v) < self.max_relative_error] = 0.5
self.inputs['X'] = input
Expand All @@ -60,50 +60,66 @@ def test_check_grad_normal(self):
paddle.disable_static()

def initTestCase(self):
self.dtype = np.float32
self.shape = (4, 10, 10)
self.max = 0.8
self.min = 0.3
self.inputs['Max'] = np.array([0.8]).astype('float32')
self.inputs['Min'] = np.array([0.1]).astype('float32')
self.inputs['Max'] = np.array([0.8]).astype(self.dtype)
self.inputs['Min'] = np.array([0.1]).astype(self.dtype)


class TestCase1(TestClipOp):
def initTestCase(self):
self.dtype = np.float32
self.shape = (8, 16, 8)
self.max = 0.7
self.min = 0.0


class TestCase2(TestClipOp):
def initTestCase(self):
self.dtype = np.float32
self.shape = (8, 16)
self.max = 1.0
self.min = 0.0


class TestCase3(TestClipOp):
def initTestCase(self):
self.dtype = np.float32
self.shape = (4, 8, 16)
self.max = 0.7
self.min = 0.2


class TestCase4(TestClipOp):
def initTestCase(self):
self.dtype = np.float32
self.shape = (4, 8, 8)
self.max = 0.7
self.min = 0.2
self.inputs['Max'] = np.array([0.8]).astype('float32')
self.inputs['Min'] = np.array([0.3]).astype('float32')
self.inputs['Max'] = np.array([0.8]).astype(self.dtype)
self.inputs['Min'] = np.array([0.3]).astype(self.dtype)


class TestCase5(TestClipOp):
def initTestCase(self):
self.dtype = np.float32
self.shape = (4, 8, 16)
self.max = 0.5
self.min = 0.5


class TestCase6(TestClipOp):
def initTestCase(self):
self.dtype == np.float16
self.shape = (4, 8, 8)
self.max = 0.7
self.min = 0.2
self.inputs['Max'] = np.array([0.8]).astype(self.dtype)
self.inputs['Min'] = np.array([0.3]).astype(self.dtype)


class TestClipOpError(unittest.TestCase):
def test_errors(self):
paddle.enable_static()
Expand Down

1 comment on commit d95dbe2

@paddle-bot-old
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Congratulation! Your pull request passed all required CI. You could ask reviewer(s) to approve and merge. 🎉

Please sign in to comment.