[MLU] add_fluid_mluop_yolo_box (#46573)

PaddlePaddle · Sep 30, 2022 · 832b0a1 · 832b0a1
1 parent d16360c
commit 832b0a1
Show file tree

Hide file tree

Showing 5 changed files with 476 additions and 1 deletion.
diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt
@@ -47,6 +47,7 @@ elseif(WITH_MLU)
   detection_library(iou_similarity_op SRCS iou_similarity_op.cc
                     iou_similarity_op_mlu.cc)
   detection_library(prior_box_op SRCS prior_box_op.cc)
+  detection_library(yolo_box_op SRCS yolo_box_op.cc yolo_box_op_mlu.cc)
 elseif(WITH_ASCEND_CL)
   detection_library(iou_similarity_op SRCS iou_similarity_op.cc
                     iou_similarity_op_npu.cc)
@@ -55,6 +56,7 @@ else()
   detection_library(iou_similarity_op SRCS iou_similarity_op.cc
                     iou_similarity_op.cu)
   detection_library(prior_box_op SRCS prior_box_op.cc)
+  detection_library(yolo_box_op SRCS yolo_box_op.cc)
   # detection_library(generate_proposals_v2_op SRCS generate_proposals_v2_op.cc)
 endif()
 
@@ -73,7 +75,6 @@ detection_library(locality_aware_nms_op SRCS locality_aware_nms_op.cc DEPS gpc)
 detection_library(matrix_nms_op SRCS matrix_nms_op.cc DEPS gpc)
 detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu)
 detection_library(yolov3_loss_op SRCS yolov3_loss_op.cc)
-detection_library(yolo_box_op SRCS yolo_box_op.cc)
 detection_library(box_decoder_and_assign_op SRCS box_decoder_and_assign_op.cc
                   box_decoder_and_assign_op.cu)
 detection_library(sigmoid_focal_loss_op SRCS sigmoid_focal_loss_op.cc

diff --git a/paddle/fluid/operators/detection/yolo_box_op_mlu.cc b/paddle/fluid/operators/detection/yolo_box_op_mlu.cc
@@ -0,0 +1,137 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/mlu/mlu_baseop.h"
+
+namespace paddle {
+namespace operators {
+template <typename T>
+class YoloBoxMLUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* x = ctx.Input<phi::DenseTensor>("X");
+    auto* img_size = ctx.Input<phi::DenseTensor>("ImgSize");
+    auto* boxes = ctx.Output<phi::DenseTensor>("Boxes");
+    auto* scores = ctx.Output<phi::DenseTensor>("Scores");
+    const std::vector<int> anchors = ctx.Attr<std::vector<int>>("anchors");
+    auto class_num = ctx.Attr<int>("class_num");
+    auto conf_thresh = ctx.Attr<float>("conf_thresh");
+    auto downsample_ratio = ctx.Attr<int>("downsample_ratio");
+    auto clip_bbox = ctx.Attr<bool>("clip_bbox");
+    auto scale = ctx.Attr<float>("scale_x_y");
+    auto iou_aware = ctx.Attr<bool>("iou_aware");
+    auto iou_aware_factor = ctx.Attr<float>("iou_aware_factor");
+
+    int anchor_num = anchors.size() / 2;
+    int64_t size = anchors.size();
+    auto dim_x = x->dims();
+    int n = dim_x[0];
+    int s = anchor_num;
+    int h = dim_x[2];
+    int w = dim_x[3];
+
+    // The output of mluOpYoloBox: A 4-D tensor with shape [N, anchor_num, 4,
+    // H*W], the coordinates of boxes, and a 4-D tensor with shape [N,
+    // anchor_num, :attr:`class_num`, H*W], the classification scores of boxes.
+    std::vector<int64_t> boxes_dim_mluops({n, s, 4, h * w});
+    std::vector<int64_t> scores_dim_mluops({n, s, class_num, h * w});
+
+    // In Paddle framework: A 3-D tensor with shape [N, M, 4], the coordinates
+    // of boxes, and a 3-D tensor with shape [N, M, :attr:`class_num`], the
+    // classification scores of boxes.
+    std::vector<int64_t> boxes_out_dim({n, s, h * w, 4});
+    std::vector<int64_t> scores_out_dim({n, s, h * w, class_num});
+
+    auto& dev_ctx = ctx.template device_context<MLUDeviceContext>();
+    phi::DenseTensor boxes_tensor_mluops =
+        ctx.AllocateTmpTensor<T, MLUDeviceContext>({n, s, 4, h * w}, dev_ctx);
+    phi::DenseTensor scores_tensor_mluops =
+        ctx.AllocateTmpTensor<T, MLUDeviceContext>({n, s, class_num, h * w},
+                                                   dev_ctx);
+    MLUOpTensorDesc boxes_trans_desc_mluops(
+        4, boxes_dim_mluops.data(), ToMluOpDataType<T>());
+    MLUCnnlTensorDesc boxes_trans_desc_cnnl(
+        4, boxes_dim_mluops.data(), ToCnnlDataType<T>());
+    MLUOpTensorDesc scores_trans_desc_mluops(
+        4, scores_dim_mluops.data(), ToMluOpDataType<T>());
+    MLUCnnlTensorDesc scores_trans_desc_cnnl(
+        4, scores_dim_mluops.data(), ToCnnlDataType<T>());
+
+    boxes->mutable_data<T>(ctx.GetPlace());
+    scores->mutable_data<T>(ctx.GetPlace());
+    FillMLUTensorWithHostValue(ctx, static_cast<T>(0), boxes);
+    FillMLUTensorWithHostValue(ctx, static_cast<T>(0), scores);
+
+    MLUOpTensorDesc x_desc(*x, MLUOP_LAYOUT_ARRAY, ToMluOpDataType<T>());
+    MLUOpTensorDesc img_size_desc(
+        *img_size, MLUOP_LAYOUT_ARRAY, ToMluOpDataType<int32_t>());
+    Tensor anchors_temp(framework::TransToPhiDataType(VT::INT32));
+    anchors_temp.Resize({size});
+    paddle::framework::TensorFromVector(
+        anchors, ctx.device_context(), &anchors_temp);
+    MLUOpTensorDesc anchors_desc(anchors_temp);
+    MLUCnnlTensorDesc boxes_desc_cnnl(
+        4, boxes_out_dim.data(), ToCnnlDataType<T>());
+    MLUCnnlTensorDesc scores_desc_cnnl(
+        4, scores_out_dim.data(), ToCnnlDataType<T>());
+
+    MLUOP::OpYoloBox(ctx,
+                     x_desc.get(),
+                     GetBasePtr(x),
+                     img_size_desc.get(),
+                     GetBasePtr(img_size),
+                     anchors_desc.get(),
+                     GetBasePtr(&anchors_temp),
+                     class_num,
+                     conf_thresh,
+                     downsample_ratio,
+                     clip_bbox,
+                     scale,
+                     iou_aware,
+                     iou_aware_factor,
+                     boxes_trans_desc_mluops.get(),
+                     GetBasePtr(&boxes_tensor_mluops),
+                     scores_trans_desc_mluops.get(),
+                     GetBasePtr(&scores_tensor_mluops));
+    const std::vector<int> perm = {0, 1, 3, 2};
+
+    // transpose the boxes from [N, S, 4, H*W] to [N, S, H*W, 4]
+    MLUCnnl::Transpose(ctx,
+                       perm,
+                       4,
+                       boxes_trans_desc_cnnl.get(),
+                       GetBasePtr(&boxes_tensor_mluops),
+                       boxes_desc_cnnl.get(),
+                       GetBasePtr(boxes));
+
+    // transpose the scores from [N, S, class_num, H*W] to [N, S, H*W,
+    // class_num]
+    MLUCnnl::Transpose(ctx,
+                       perm,
+                       4,
+                       scores_trans_desc_cnnl.get(),
+                       GetBasePtr(&scores_tensor_mluops),
+                       scores_desc_cnnl.get(),
+                       GetBasePtr(scores));
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+namespace plat = paddle::platform;
+
+REGISTER_OP_MLU_KERNEL(yolo_box, ops::YoloBoxMLUKernel<float>);
diff --git a/paddle/fluid/operators/mlu/mlu_baseop.cc b/paddle/fluid/operators/mlu/mlu_baseop.cc
@@ -5418,5 +5418,45 @@ MLURNNDesc::~MLURNNDesc() {
                                                               diff_x));
 }
 
+/* static */ void MLUOP::OpYoloBox(const ExecutionContext& ctx,
+                                   const mluOpTensorDescriptor_t x_desc,
+                                   const void* x,
+                                   const mluOpTensorDescriptor_t img_size_desc,
+                                   const void* img_size,
+                                   const mluOpTensorDescriptor_t anchors_desc,
+                                   const void* anchors,
+                                   const int class_num,
+                                   const float conf_thresh,
+                                   const int downsample_ratio,
+                                   const bool clip_bbox,
+                                   const float scale,
+                                   const bool iou_aware,
+                                   const float iou_aware_factor,
+                                   const mluOpTensorDescriptor_t boxes_desc,
+                                   void* boxes,
+                                   const mluOpTensorDescriptor_t scores_desc,
+                                   void* scores) {
+  mluOpHandle_t handle = GetMLUOpHandleFromCTX(ctx);
+
+  PADDLE_ENFORCE_MLU_SUCCESS(mluOpYoloBox(handle,
+                                          x_desc,
+                                          x,
+                                          img_size_desc,
+                                          img_size,
+                                          anchors_desc,
+                                          anchors,
+                                          class_num,
+                                          conf_thresh,
+                                          downsample_ratio,
+                                          clip_bbox,
+                                          scale,
+                                          iou_aware,
+                                          iou_aware_factor,
+                                          boxes_desc,
+                                          boxes,
+                                          scores_desc,
+                                          scores));
+}
+
 }  // namespace operators
 }  // namespace paddle
diff --git a/paddle/fluid/operators/mlu/mlu_baseop.h b/paddle/fluid/operators/mlu/mlu_baseop.h
@@ -2292,6 +2292,27 @@ class MLUCnnl {
       void* diff_x);
 };
 
+class MLUOP {
+ public:
+  static void OpYoloBox(const ExecutionContext& ctx,
+                        const mluOpTensorDescriptor_t x_desc,
+                        const void* x,
+                        const mluOpTensorDescriptor_t img_size_desc,
+                        const void* img_size,
+                        const mluOpTensorDescriptor_t anchors_desc,
+                        const void* anchors,
+                        const int class_num,
+                        const float conf_thresh,
+                        const int downsample_ratio,
+                        const bool clip_bbox,
+                        const float scale,
+                        const bool iou_aware,
+                        const float iou_aware_factor,
+                        const mluOpTensorDescriptor_t boxes_desc,
+                        void* boxes,
+                        const mluOpTensorDescriptor_t scores_desc,
+                        void* scores);
+};
 const std::map<const std::string, std::pair<std::vector<int>, std::vector<int>>>
     TransPermMap = {
         // trans_mode, (forward_perm, backward_perm)