PaddlePaddle · 404988613 · Mar 9, 2023 · Mar 9, 2023 · Mar 9, 2023 · Mar 9, 2023
diff --git a/paddle/phi/kernels/gpu/bce_loss_grad_kernel.cu b/paddle/phi/kernels/gpu/bce_loss_grad_kernel.cu
@@ -21,6 +21,8 @@
 #include "paddle/phi/core/hostdevice.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/elementwise_base.h"
+#include "paddle/phi/common/amp_type_traits.h"
+#include "paddle/phi/common/float16.h"
 
 namespace phi {
 
@@ -37,6 +39,11 @@ struct BCELossGradFunctor {
   HOSTDEVICE inline T operator()(const T x, const T label, const T dout) const {
     T term1 = max((one - x) * x, eps);
     return (dout * (x - label) / term1);
+    using MT = typename phi::dtype::MPTypeTrait<T>::Type;
+    MT x_mt = static_cast<MT>(x);
+    MT term1 = max((static_cast<MT>(one) - x_mt) * x_mt, static_cast<MT>(eps));
+    return static_cast<T>(static_cast<MT>(dout) *
+                          (x_mt - static_cast<MT>(label)) / term1);
   }
 };
 
@@ -57,3 +64,10 @@ void BCELossGradKernel(const Context& dev_ctx,
 
 PD_REGISTER_KERNEL(
     bce_loss_grad, GPU, ALL_LAYOUT, phi::BCELossGradKernel, float, double) {}
+PD_REGISTER_KERNEL(bce_loss_grad,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::BCELossGradKernel,
+                   float,
+                   double,
+                   phi::dtype::float16) {}
diff --git a/paddle/phi/kernels/gpu/bce_loss_kernel.cu b/paddle/phi/kernels/gpu/bce_loss_kernel.cu
@@ -22,6 +22,8 @@
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/elementwise_base.h"
 #include "paddle/phi/kernels/primitive/functor_primitives.h"
+#include "paddle/phi/common/amp_type_traits.h"
+#include "paddle/phi/common/float16.h"
 
 namespace phi {
 
@@ -43,6 +45,15 @@ struct BCELossFunctor {
     T term1 = max(phi::kps::details::Log(x), neg_100);
     T term2 = max(phi::kps::details::Log(one - x), neg_100);
     return (((label - one) * term2) - (label * term1));
+    using MT = typename phi::dtype::MPTypeTrait<T>::Type;
+    MT term1 = max(phi::kps::details::Log(static_cast<MT>(x)),
+                   static_cast<MT>(neg_100));
+    MT term2 =
+        max(phi::kps::details::Log(static_cast<MT>(one) - static_cast<MT>(x)),
+            static_cast<MT>(neg_100));
+    return static_cast<T>(
+        ((static_cast<MT>(label) - static_cast<MT>(one)) * term2) -
+        (static_cast<MT>(label) * term1));
   }
 };
 
@@ -62,3 +73,10 @@ void BCELossKernel(const Context& dev_ctx,
 
 PD_REGISTER_KERNEL(
     bce_loss, GPU, ALL_LAYOUT, phi::BCELossKernel, float, double) {}
+PD_REGISTER_KERNEL(bce_loss,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::BCELossKernel,
+                   float,
+                   double,
+                   phi::dtype::float16) {}
diff --git a/paddle/phi/kernels/gpu/maxout_grad_kernel.cu b/paddle/phi/kernels/gpu/maxout_grad_kernel.cu
@@ -15,5 +15,14 @@
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/impl/maxout_grad_kernel_impl.h"
 
+
 PD_REGISTER_KERNEL(
     maxout_grad, GPU, ALL_LAYOUT, phi::MaxOutGradKernel, float, double) {}
+
+PD_REGISTER_KERNEL(maxout_grad,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::MaxOutGradKernel,
+                   float,
+                   double,
+                   phi::dtype::float16) {}
diff --git a/paddle/phi/kernels/gpu/maxout_kernel.cu b/paddle/phi/kernels/gpu/maxout_kernel.cu
@@ -15,4 +15,13 @@
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/impl/maxout_kernel_impl.h"
 
+
 PD_REGISTER_KERNEL(maxout, GPU, ALL_LAYOUT, phi::MaxOutKernel, float, double) {}
+
+PD_REGISTER_KERNEL(maxout,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::MaxOutKernel,
+                   float,
+                   double,
+                   phi::dtype::float16) {}
diff --git a/paddle/phi/kernels/impl/maxout_grad_kernel_impl.h b/paddle/phi/kernels/impl/maxout_grad_kernel_impl.h
@@ -17,6 +17,7 @@
 #include "paddle/phi/kernels/funcs/math_function.h"
 #include "paddle/phi/kernels/funcs/maxouting.h"
 #include "paddle/phi/kernels/maxout_grad_kernel.h"
+#include "paddle/phi/common/amp_type_traits.h"
 
 namespace phi {
 
@@ -33,10 +34,15 @@ void MaxOutGradKernel(const Context& dev_ctx,
   }
 
   phi::funcs::SetConstant<Context, T> zero;
+  using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
+  phi::funcs::SetConstant<Context, MPType> zero;
   if (x_grad) {
     dev_ctx.template Alloc<T>(x_grad);
     zero(dev_ctx, x_grad, static_cast<T>(0.0));
     phi::funcs::MaxOutGradFunctor<Context, T> maxout_backward;
+    dev_ctx.template Alloc<MPType>(x_grad);
+    zero(dev_ctx, x_grad, static_cast<MPType>(0.0));
+    phi::funcs::MaxOutGradFunctor<Context, MPType> maxout_backward;
     maxout_backward(dev_ctx, x, x_grad, out, out_grad, groups, axis);
   }
 }

diff --git a/paddle/phi/kernels/impl/maxout_kernel_impl.h b/paddle/phi/kernels/impl/maxout_kernel_impl.h
@@ -16,6 +16,7 @@
 
 #include "paddle/phi/kernels/funcs/maxouting.h"
 #include "paddle/phi/kernels/maxout_kernel.h"
+#include "paddle/phi/common/amp_type_traits.h"
 
 namespace phi {
 
@@ -30,6 +31,8 @@ void MaxOutKernel(const Context& dev_ctx,
   }
 
   phi::funcs::MaxOutFunctor<Context, T> maxout_forward;
+  using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
+  phi::funcs::MaxOutFunctor<Context, MPType> maxout_forward;
   maxout_forward(dev_ctx, x, out, groups, axis);
 }
 

diff --git a/paddle/phi/kernels/maxout_grad_kernel.h b/paddle/phi/kernels/maxout_grad_kernel.h
@@ -14,6 +14,8 @@
 
 #pragma once
 
+#include "paddle/phi/core/device_context.h"
+
 #include "paddle/phi/core/dense_tensor.h"
 
 namespace phi {

diff --git a/paddle/phi/kernels/maxout_kernel.h b/paddle/phi/kernels/maxout_kernel.h
@@ -14,6 +14,8 @@
 
 #pragma once
 
+#include "paddle/phi/core/device_context.h"
+
 #include "paddle/phi/core/dense_tensor.h"
 
 namespace phi {

diff --git a/python/paddle/fluid/tests/unittests/test_bce_loss.py b/python/paddle/fluid/tests/unittests/test_bce_loss.py
@@ -19,6 +19,7 @@
 
 import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 
 
 def test_static_layer(
@@ -278,6 +279,48 @@ class TestBceLossOpCase2(OpTest):
     def init_test_cast(self):
         self.shape = [2, 3, 20]
 
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(), "core is not compiled with CUDA"
+)
+class TestBceLossOpFP16(OpTest):
+    def setUp(self):
+        self.init_test_case()
+        self.op_type = "bce_loss"
+        self.python_api = bce_wrapper
+        input_np = np.random.uniform(0.1, 0.8, self.shape).astype("float16")
+        label_np = np.random.randint(0, 2, self.shape).astype("float16")
+        output_np = bce_loss(input_np, label_np)
+
+        self.inputs = {'X': input_np, 'Label': label_np}
+        self.outputs = {'Out': output_np}
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+            if core.is_float16_supported(place):
+                self.check_output_with_place(place, atol=1e-3)
+
+    def test_check_grad(self):
+        place = core.CUDAPlace(0)
+        if core.is_float16_supported(place):
+            self.check_grad_with_place(
+                place, ['X'], 'Out', max_relative_error=0.5
+            )
+
+    def init_test_case(self):
+        self.shape = [10, 10]
+
+
+class TestBceLossOpFP16Case1(OpTest):
+    def init_test_cast(self):
+        self.shape = [20, 30, 40, 50]
+
+
+class TestBceLossOpFP16Case2(OpTest):
+    def init_test_cast(self):
+        self.shape = [2, 3, 20]
+
+
 
 if __name__ == "__main__":
     paddle.enable_static()

diff --git a/python/paddle/fluid/tests/unittests/test_maxout_op.py b/python/paddle/fluid/tests/unittests/test_maxout_op.py
@@ -135,6 +135,34 @@ def test_errors(self):
             x_float32 = paddle.fluid.data(name='x_float32', shape=[2, 4, 6, 8])
             self.assertRaises(ValueError, F.maxout, x_float32, 2, 2)
 
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(), "core is not compiled with CUDA"
+)
+class TestMaxOutOpFP16(OpTest):
+    def setUp(self):
+        self.op_type = "maxout"
+        self.python_api = paddle.nn.Maxout
+        input_np = np.random.uniform(-1, 1, [2, 6, 5, 4]).astype(np.float16)
+        self.groups = 2
+        self.axis = 1
+        output_np = maxout_forward_naive(input_np, self.groups, self.axis)
+        self.attrs = {'groups': self.groups, 'axis': self.axis}
+        self.inputs = {'X': input_np}
+        self.outputs = {'Out': output_np}
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+            if core.is_float16_supported(place):
+                self.check_output_with_place(place, atol=1e-3)
+
+    def test_check_grad(self):
+        place = core.CUDAPlace(0)
+        if core.is_float16_supported(place):
+            self.check_grad_with_place(
+                place, ['X'], 'Out', max_relative_error=0.5
+            )
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py
@@ -69,6 +69,7 @@ class BCEWithLogitsLoss(Layer):
         weight (Tensor, optional): A manual rescaling weight given to the loss of each
             batch element. If given, it has to be a 1D Tensor whose size is `[N, ]`,
             The data type is float32, float64. Default is ``'None'``.
+            The data type is float16, float32, float64. Default is ``'None'``.
         reduction (str, optional): Indicate how to average the loss by batch_size,
             the candicates are ``'none'`` | ``'mean'`` | ``'sum'``.
             If :attr:`reduction` is ``'none'``, the unreduced loss is returned;