PaddlePaddle · AndPuQing · Mar 27, 2024 · Mar 27, 2024 · Mar 30, 2024 · zhwesky2010
diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml
@@ -2504,8 +2504,8 @@
     func : tensor_unfold_grad
 
 - backward_op : thresholded_relu_grad
-  forward : thresholded_relu (Tensor x, float threshold) -> Tensor(out)
-  args : (Tensor x, Tensor out_grad, float threshold)
+  forward : thresholded_relu (Tensor x, float threshold, float value) -> Tensor(out)
+  args : (Tensor x, Tensor out_grad, float threshold, float value)
   output : Tensor(x_grad)
   infer_meta :
     func : UnchangedInferMeta

diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml
@@ -1745,7 +1745,7 @@
 
 - op : histogram
   inputs :
-    input : X
+    {input: X, weight: Weight}
   outputs :
     out : Out
 

diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml
@@ -1336,8 +1336,9 @@
   backward : heaviside_grad
 
 - op : histogram
-  args : (Tensor input, int64_t bins = 100, int min = 0, int max = 0)
+  args : (Tensor input, Tensor weight, bool density = false, int64_t bins = 100, int min = 0, int max = 0)
   output : Tensor(out)
+  optional: weight
   infer_meta :
     func : HistogramInferMeta
   kernel :
@@ -2837,7 +2838,7 @@
   no_need_buffer : input
 
 - op : thresholded_relu
-  args : (Tensor x, float threshold = 1.0)
+  args : (Tensor x, float threshold = 1.0, float value = 0.0)
   output : Tensor(out)
   infer_meta :
     func : UnchangedInferMeta

diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc
@@ -22,6 +22,7 @@ limitations under the License. */
 #include "paddle/phi/common/type_traits.h"
 #include "paddle/phi/core/enforce.h"
 #include "paddle/phi/core/infermeta_utils.h"
+#include "paddle/phi/core/meta_tensor.h"
 #include "paddle/phi/core/utils/data_type.h"
 #include "paddle/phi/kernels/funcs/parse_qr_mode.h"
 #include "paddle/phi/kernels/funcs/pooling.h"
@@ -1916,8 +1917,13 @@ void GumbelSoftmaxInferMeta(const MetaTensor& x,
   UnchangedInferMetaCheckAxis(x, axis, out);
 }
 
-void HistogramInferMeta(
-    const MetaTensor& input, int64_t bins, int min, int max, MetaTensor* out) {
+void HistogramInferMeta(const MetaTensor& input,
+                        const MetaTensor& weight,
+                        bool density,
+                        int64_t bins,
+                        int min,
+                        int max,
+                        MetaTensor* out) {
   PADDLE_ENFORCE_GE(bins,
                     1,
                     phi::errors::InvalidArgument(
@@ -1932,9 +1938,25 @@ void HistogramInferMeta(
                                    max,
                                    min));
 
+  if (weight) {
+    auto weight_dims = weight.dims();
+    PADDLE_ENFORCE_EQ(
+        weight_dims,
+        input.dims(),
+        phi::errors::InvalidArgument(
+            "The shape of weight should be equal to the shape of input."
+            "But received weight shape is [%s], input shape is [%s]",
+            weight_dims,
+            input.dims()));
+  }
+
   out->set_dims({bins});
   out->share_lod(input);
-  out->set_dtype(DataType::INT64);
+  if (density || weight) {
+    out->set_dtype(DataType::FLOAT32);
+  } else {
+    out->set_dtype(DataType::INT64);
+  }
 }
 
 void IdentityLossInferMeta(const MetaTensor& x,

diff --git a/paddle/phi/infermeta/unary.h b/paddle/phi/infermeta/unary.h
@@ -307,8 +307,13 @@ void GumbelSoftmaxInferMeta(const MetaTensor& x,
                             bool hard,
                             int axis,
                             MetaTensor* out);
-void HistogramInferMeta(
-    const MetaTensor& input, int64_t bins, int min, int max, MetaTensor* out);
+void HistogramInferMeta(const MetaTensor& input,
+                        const MetaTensor& weight,
+                        bool density,
+                        int64_t bins,
+                        int min,
+                        int max,
+                        MetaTensor* out);
 
 void IdentityLossInferMeta(const MetaTensor& x, int reduction, MetaTensor* out);
 

diff --git a/paddle/phi/kernels/activation_grad_kernel.h b/paddle/phi/kernels/activation_grad_kernel.h
@@ -307,7 +307,6 @@ DECLARE_ACTIVATION_GRAD_KERNEL_NODEP(Floor);
 DECLARE_ACTIVATION_GRAD_KERNEL_NODEP(Ceil);
 
 DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu, alpha);
-DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(ThresholdedRelu, threshold);
 DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(SoftShrink, lambda);
 DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(HardShrink, threshold);
 DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Logit, eps);
@@ -317,6 +316,7 @@ DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Celu, alpha);
 DECLARE_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(HardTanh, t_min, t_max);
 DECLARE_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(STanh, scale_a, scale_b);
 DECLARE_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(Softplus, beta, threshold);
+DECLARE_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(ThresholdedRelu, threshold, value);
 DECLARE_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT(HardSigmoid, slope, offset);
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/activation_kernel.h b/paddle/phi/kernels/activation_kernel.h
@@ -74,7 +74,6 @@ DECLARE_ACTIVATION_KERNEL(Ceil)
 DECLARE_ACTIVATION_KERNEL(Negative)
 
 DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(LeakyRelu, alpha)
-DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu, threshold)
 DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(SoftShrink, lambda)
 DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Mish, threshold)
 DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(HardShrink, threshold)
@@ -87,6 +86,7 @@ DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(HardTanh, t_min, t_max)
 DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(STanh, scale_a, scale_b)
 DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(Softplus, beta, threshold)
 DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(HardSigmoid, slope, offset)
+DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(ThresholdedRelu, threshold, value)
 
 template <typename T, typename Context>
 void HardSwishKernel(const Context& dev_ctx,

diff --git a/paddle/phi/kernels/cpu/activation_grad_kernel.cc b/paddle/phi/kernels/cpu/activation_grad_kernel.cc
@@ -155,9 +155,6 @@ DEFINE_CPU_ACTIVATION_GRAD_KERNEL_NODEP(Ceil, ZeroGradFunctor);
 DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu,
                                                LeakyReluGradFunctor,
                                                alpha);
-DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(ThresholdedRelu,
-                                               ThresholdedReluGradFunctor,
-                                               threshold);
 DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(SoftShrink,
                                                SoftShrinkGradFunctor,
                                                lambda);
@@ -184,6 +181,10 @@ DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(Softplus,
                                                SoftplusGradFunctor,
                                                beta,
                                                threshold);
+DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(ThresholdedRelu,
+                                               ThresholdedReluGradFunctor,
+                                               threshold,
+                                               value);
 DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT(HardSigmoid,
                                                  HardSigmoidGradFunctor,
                                                  slope,

diff --git a/paddle/phi/kernels/cpu/activation_kernel.cc b/paddle/phi/kernels/cpu/activation_kernel.cc
@@ -106,9 +106,6 @@ DEFINE_CPU_ACTIVATION_KERNEL_WITH_INT_IN_FLOAT_OUT(Exp, ExpFunctor)
 DEFINE_CPU_ACTIVATION_KERNEL_WITH_INT_IN_FLOAT_OUT(Expm1, Expm1Functor)
 
 DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, LeakyReluFunctor, alpha)
-DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu,
-                                     ThresholdedReluFunctor,
-                                     threshold)
 DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(Mish, MishFunctor, threshold)
 DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(HardShrink, HardShrinkFunctor, threshold)
 DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(SoftShrink, SoftShrinkFunctor, lambda)
@@ -122,6 +119,10 @@ DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(HardSigmoid,
                                      HardSigmoidFunctor,
                                      slope,
                                      offset)
+DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(ThresholdedRelu,
+                                     ThresholdedReluFunctor,
+                                     threshold,
+                                     value)
 
 template <typename T, typename Context>
 void HardSwishKernel(const Context& dev_ctx,

diff --git a/paddle/phi/kernels/cpu/histogram_kernel.cc b/paddle/phi/kernels/cpu/histogram_kernel.cc
@@ -13,16 +13,24 @@
 // limitations under the License.
 
 #include "paddle/phi/kernels/histogram_kernel.h"
+#include <cstdint>
 
 #include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/backends/device_ext.h"
+#include "paddle/phi/common/data_type.h"
+#include "paddle/phi/common/int_array.h"
+#include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/reduce_sum_kernel.h"
+#include "paddle/utils/optional.h"
 
 namespace phi {
-
 template <typename T, typename Context>
 void HistogramKernel(const Context& dev_ctx,
                      const DenseTensor& input,
+                     const paddle::optional<DenseTensor>& weight,
+                     bool density,
                      int64_t bins,
                      int min,
                      int max,
@@ -32,12 +40,9 @@ void HistogramKernel(const Context& dev_ctx,
   auto& maxval = max;
 
   const T* input_data = input.data<T>();
+  auto weight_data = weight.get_ptr() ? weight.get_ptr()->data<T>() : nullptr;
   auto input_numel = input.numel();
 
-  int64_t* out_data = dev_ctx.template Alloc<int64_t>(output);
-  phi::funcs::SetConstant<Context, int64_t>()(
-      dev_ctx, output, static_cast<int64_t>(0));
-
   if (input_data == nullptr) return;
 
   T output_min = static_cast<T>(minval);
@@ -67,11 +72,38 @@ void HistogramKernel(const Context& dev_ctx,
           maxval,
           minval));
 
-  for (int64_t i = 0; i < input_numel; i++) {
-    if (input_data[i] >= output_min && input_data[i] <= output_max) {
-      const int64_t bin = (int64_t)((input_data[i] - output_min) * nbins /
-                                    (output_max - output_min));
-      out_data[std::min(bin, nbins - 1)] += 1;
+  if (density || weight_data) {
+    float* out_data = dev_ctx.template Alloc<float>(output);
+    phi::funcs::SetConstant<Context, float>()(
+        dev_ctx, output, static_cast<float>(0));
+    for (int64_t i = 0; i < input_numel; i++) {
+      if (input_data[i] >= output_min && input_data[i] <= output_max) {
+        const int64_t bin = (int64_t)((input_data[i] - output_min) * nbins /
+                                      (output_max - output_min));
+        out_data[std::min(bin, nbins - 1)] +=
+            weight_data ? static_cast<float>(weight_data[i]) : 1;
+      }
+    }
+    if (density) {
+      DenseTensor sum = phi::Sum<float, Context>(
+          dev_ctx, *output, phi::IntArray({0}), phi::DataType::FLOAT32, false);
+      float* sum_data = sum.data<float>();
+      float gap = static_cast<float>(nbins) /
+                  static_cast<float>((output_max - output_min)) / *sum_data;
+      for (int64_t i = 0; i < nbins; i++) {
+        out_data[i] *= gap;
+      }
+    }
+  } else {
+    int64_t* out_data = dev_ctx.template Alloc<int64_t>(output);
+    phi::funcs::SetConstant<Context, int64_t>()(
+        dev_ctx, output, static_cast<int64_t>(0));
+    for (int64_t i = 0; i < input_numel; i++) {
+      if (input_data[i] >= output_min && input_data[i] <= output_max) {
+        const int64_t bin = (int64_t)((input_data[i] - output_min) * nbins /
+                                      (output_max - output_min));
+        out_data[std::min(bin, nbins - 1)] += 1;
+      }
     }
   }
 }
@@ -85,6 +117,4 @@ PD_REGISTER_KERNEL(histogram,
                    float,
                    double,
                    int,
-                   int64_t) {
-  kernel->OutputAt(0).SetDataType(paddle::DataType::INT64);
-}
+                   int64_t) {}
diff --git a/paddle/phi/kernels/funcs/activation_functor.h b/paddle/phi/kernels/funcs/activation_functor.h
@@ -1825,22 +1825,25 @@ struct LeakyReluGradGradFunctor : public BaseActivationFunctor<T> {
 template <typename T>
 struct ThresholdedReluFunctor : public BaseActivationFunctor<T> {
   float threshold;
+  float value;
   typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
-    return {{"threshold", &threshold}};
+    return {{"threshold", &threshold}, {"value", &value}};
   }
 
   template <typename Device, typename X, typename Out>
   void operator()(Device d, X x, Out out) const {
     auto th = static_cast<T>(threshold);  // NOLINT
-    out.device(d) = (x > th).template cast<T>() * x;
+    out.device(d) = (x > th).template cast<T>() * x +
+                    (x <= th).template cast<T>() * static_cast<T>(value);
   }
 };
 
 template <typename T>
 struct ThresholdedReluGradFunctor : public BaseActivationFunctor<T> {
   float threshold;
+  float value;
   typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
-    return {{"threshold", &threshold}};
+    return {{"threshold", &threshold}, {"value", &value}};
   }
 
   template <typename Device,
@@ -4100,24 +4103,26 @@ template <typename T>
 struct CudaThresholdedReluFunctor : public BaseActivationFunctor<T> {
   T zero = static_cast<T>(0.0f);
   float threshold;
+  float value;
 
   typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
-    return {{"threshold", &threshold}};
+    return {{"threshold", &threshold}, {"value", &value}};
   }
 
-  // thresholded_relu(x) = x > threshold ? x : 0
+  // thresholded_relu(x, threshold, value) = x > threshold ? x : value
   __device__ __forceinline__ T operator()(const T x) const {
-    return x > static_cast<T>(threshold) ? x : zero;
+    return x > static_cast<T>(threshold) ? x : static_cast<T>(value);
   }
 };
 
 template <typename T>
 struct CudaThresholdedReluGradFunctor : public BaseActivationFunctor<T> {
   T zero = static_cast<T>(0.0f);
   float threshold;
+  float value;
 
   typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
-    return {{"threshold", &threshold}};
+    return {{"threshold", &threshold}, {"value", &value}};
   }
 
   // dx = x > threshold ? dout : 0

diff --git a/paddle/phi/kernels/gpu/activation_grad_kernel.cu b/paddle/phi/kernels/gpu/activation_grad_kernel.cu
@@ -209,9 +209,6 @@ DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Swish, CudaSwishGradFunctor);
 DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu,
                                                CudaLeakyReluGradFunctor,
                                                alpha);
-DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(ThresholdedRelu,
-                                               CudaThresholdedReluGradFunctor,
-                                               threshold);
 DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(SoftShrink,
                                                CudaSoftShrinkGradFunctor,
                                                lambda);
@@ -243,6 +240,10 @@ DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(Softplus,
                                                CudaSoftplusGradFunctor,
                                                beta,
                                                threshold);
+DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX(ThresholdedRelu,
+                                               CudaThresholdedReluGradFunctor,
+                                               threshold,
+                                               value);
 DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPOUT(HardSigmoid,
                                                  CudaHardSigmoidGradFunctor,
                                                  slope,

diff --git a/paddle/phi/kernels/gpu/activation_kernel.cu b/paddle/phi/kernels/gpu/activation_kernel.cu
@@ -123,9 +123,6 @@ DEFINE_GPU_ACTIVATION_KERNEL_WITH_INT_IN_FLOAT_OUT(Expm1, CudaExpm1Functor)
 
 DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, CudaLeakyReluFunctor, alpha)
 DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(LogitCUDA, CudaLogitFunctor, eps)
-DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu,
-                                     CudaThresholdedReluFunctor,
-                                     threshold)
 DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(HardShrink,
                                      CudaHardShrinkFunctor,
                                      threshold)
@@ -148,6 +145,10 @@ DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(HardSigmoid,
                                      slope,
                                      offset)
 DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(Selu, CudaSeluFunctor, scale, alpha)
+DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(ThresholdedRelu,
+                                     CudaThresholdedReluFunctor,
+                                     threshold,
+                                     value)
 
 template <typename T, typename Context>
 void HardSwishKernel(const Context& dev_ctx,