From ae408f526e28f0fc9a9da54db65c5fee86f68900 Mon Sep 17 00:00:00 2001
From: co63oc <co63@163.com>
Date: Wed, 17 Apr 2024 07:26:46 +0800
Subject: [PATCH 1/2] Fix

---
 paddle/fluid/framework/op_compatible_info.cc  |   2 -
 .../inference/tensorrt/convert/CMakeLists.txt |   1 -
 .../tensorrt/convert/multiclass_nms_op.cc     | 163 -----
 .../fluid/operators/detection/CMakeLists.txt  |   1 -
 .../operators/detection/multiclass_nms_op.cc  | 630 ------------------
 python/paddle/incubate/layers/__init__.py     |   1 -
 python/paddle/incubate/layers/nn.py           | 130 ----
 test/legacy_test/test_detection.py            |  21 -
 test/legacy_test/test_multiclass_nms_op.py    | 249 +------
 9 files changed, 1 insertion(+), 1197 deletions(-)
 delete mode 100644 paddle/fluid/inference/tensorrt/convert/multiclass_nms_op.cc
 delete mode 100644 paddle/fluid/operators/detection/multiclass_nms_op.cc
diff --git a/paddle/fluid/framework/op_compatible_info.cc b/paddle/fluid/framework/op_compatible_info.cc
index 203d177bba916..1a37422323283 100644
--- a/paddle/fluid/framework/op_compatible_info.cc
+++ b/paddle/fluid/framework/op_compatible_info.cc
@@ -92,8 +92,6 @@ void OpCompatibleMap::InitOpCompatibleMap() {
                                            OpCompatibleType::definite_not};
   op_compatible_map_["match_matrix_tensor"] = {"1.6.0",
                                                OpCompatibleType::definite_not};
-  op_compatible_map_["multiclass_nms2"] = {"1.6.0",
-                                           OpCompatibleType::definite_not};
   op_compatible_map_["one_hot_v2"] = {"1.6.0", OpCompatibleType::definite_not};
   op_compatible_map_["pull_box_sparse"] = {"1.6.0",
                                            OpCompatibleType::definite_not};
diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
index 7cc4201420cb2..48e7f8b87b54b 100755
--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -55,7 +55,6 @@ list(
   arg_min_op.cc
   roi_align_op.cc
   affine_channel_op.cc
-  multiclass_nms_op.cc
   multiclass_nms3_op.cc
   nearest_interp_op.cc
   reshape_op.cc
diff --git a/paddle/fluid/inference/tensorrt/convert/multiclass_nms_op.cc b/paddle/fluid/inference/tensorrt/convert/multiclass_nms_op.cc
deleted file mode 100644
index e14ee099aa0f8..0000000000000
--- a/paddle/fluid/inference/tensorrt/convert/multiclass_nms_op.cc
+++ /dev/null
@@ -1,163 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <vector>
-
-#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
-
-namespace paddle {
-namespace inference {
-namespace tensorrt {
-
-class MultiClassNMSOpConverter : public OpConverter {
- public:
-  void operator()(const framework::proto::OpDesc& op,
-                  const framework::Scope& scope,
-                  bool test_mode) override {
-    VLOG(3) << "convert a multiclassNMS op to tensorrt plugin";
-
-    // for now, only work for static shape and regular tensor
-    framework::OpDesc op_desc(op, nullptr);
-
-    std::string bboxes = op_desc.Input("BBoxes").front();
-    std::string scores = op_desc.Input("Scores").front();
-    std::string output_name = op_desc.Output("Out").front();
-
-    auto* bboxes_tensor = engine_->GetITensor(bboxes);
-    auto* scores_tensor = engine_->GetITensor(scores);
-
-    int background_label =
-        PADDLE_GET_CONST(int, op_desc.GetAttr("background_label"));
-    float score_threshold =
-        PADDLE_GET_CONST(float, op_desc.GetAttr("score_threshold"));
-    int nms_top_k = PADDLE_GET_CONST(int, op_desc.GetAttr("nms_top_k"));
-    float nms_threshold =
-        PADDLE_GET_CONST(float, op_desc.GetAttr("nms_threshold"));
-    int keep_top_k = PADDLE_GET_CONST(int, op_desc.GetAttr("keep_top_k"));
-    bool normalized = PADDLE_GET_CONST(bool, op_desc.GetAttr("normalized"));
-    int class_index = engine_->with_dynamic_shape() ? 1 : 0;
-    int num_classes = scores_tensor->getDimensions().d[class_index];
-
-    auto bboxes_dims = bboxes_tensor->getDimensions();
-    nvinfer1::IShuffleLayer* bboxes_expand_layer = nullptr;
-    nvinfer1::IShuffleLayer* scores_transpose_layer = nullptr;
-    if (engine_->with_dynamic_shape()) {
-      nvinfer1::Dims4 bboxes_expand_dims(
-          bboxes_dims.d[0], bboxes_dims.d[1], 1, bboxes_dims.d[2]);
-      bboxes_expand_layer =
-          TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *bboxes_tensor);
-      bboxes_expand_layer->setReshapeDimensions(bboxes_expand_dims);
-
-      nvinfer1::Permutation permutation{0, 2, 1};
-      scores_transpose_layer =
-          TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *scores_tensor);
-      scores_transpose_layer->setFirstTranspose(permutation);
-    } else {
-      nvinfer1::Dims3 bboxes_expand_dims(bboxes_dims.d[0], 1, bboxes_dims.d[1]);
-      bboxes_expand_layer =
-          TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *bboxes_tensor);
-      bboxes_expand_layer->setReshapeDimensions(bboxes_expand_dims);
-
-      nvinfer1::Permutation permutation{1, 0};
-      scores_transpose_layer =
-          TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *scores_tensor);
-      scores_transpose_layer->setFirstTranspose(permutation);
-    }
-
-    std::vector<nvinfer1::ITensor*> batch_nms_inputs;
-    batch_nms_inputs.push_back(bboxes_expand_layer->getOutput(0));
-    batch_nms_inputs.push_back(scores_transpose_layer->getOutput(0));
-
-    constexpr bool shareLocation = true;
-    constexpr bool clip_boxes = false;
-
-    const std::vector<nvinfer1::PluginField> fields{
-        {"shareLocation", &shareLocation, nvinfer1::PluginFieldType::kINT32, 1},
-        {"backgroundLabelId",
-         &background_label,
-         nvinfer1::PluginFieldType::kINT32,
-         1},
-        {"numClasses", &num_classes, nvinfer1::PluginFieldType::kINT32, 1},
-        {"topK", &nms_top_k, nvinfer1::PluginFieldType::kINT32, 1},
-        {"keepTopK", &keep_top_k, nvinfer1::PluginFieldType::kINT32, 1},
-        {"scoreThreshold",
-         &score_threshold,
-         nvinfer1::PluginFieldType::kFLOAT32,
-         1},
-        {"iouThreshold",
-         &nms_threshold,
-         nvinfer1::PluginFieldType::kFLOAT32,
-         1},
-        {"isNormalized", &normalized, nvinfer1::PluginFieldType::kINT32, 1},
-        {"clipBoxes", &clip_boxes, nvinfer1::PluginFieldType::kINT32, 1},
-    };
-
-    nvinfer1::PluginFieldCollection* plugin_collections =
-        static_cast<nvinfer1::PluginFieldCollection*>(
-            malloc(sizeof(*plugin_collections) +
-                   fields.size() * sizeof(nvinfer1::PluginField)));
-    plugin_collections->nbFields = static_cast<int>(fields.size());
-    plugin_collections->fields = fields.data();
-
-    std::string nms_plugin_name = "BatchedNMS_TRT";
-    if (engine_->with_dynamic_shape()) {
-      nms_plugin_name = "BatchedNMSDynamic_TRT";
-    }
-    auto creator =
-        GetPluginRegistry()->getPluginCreator(nms_plugin_name.c_str(), "1");
-    auto batch_nms_plugin =
-        creator->createPlugin(nms_plugin_name.c_str(), plugin_collections);
-    free(plugin_collections);
-
-    auto batch_nms_layer = engine_->network()->addPluginV2(
-        batch_nms_inputs.data(), batch_nms_inputs.size(), *batch_nms_plugin);
-    auto nmsed_boxes = batch_nms_layer->getOutput(1);
-    auto nmsed_scores = batch_nms_layer->getOutput(2);
-    auto nmsed_classes = batch_nms_layer->getOutput(3);
-
-    auto nmsed_scores_transpose_layer =
-        TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *nmsed_scores);
-    auto nmsed_classes_reshape_layer =
-        TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *nmsed_classes);
-    if (engine_->with_dynamic_shape()) {
-      nmsed_scores_transpose_layer->setReshapeDimensions(
-          nvinfer1::Dims3(bboxes_dims.d[0], keep_top_k, 1));
-
-      nmsed_classes_reshape_layer->setReshapeDimensions(
-          nvinfer1::Dims3(bboxes_dims.d[0], keep_top_k, 1));
-    } else {
-      nmsed_scores_transpose_layer->setReshapeDimensions(
-          nvinfer1::Dims2(keep_top_k, 1));
-
-      nmsed_classes_reshape_layer->setReshapeDimensions(
-          nvinfer1::Dims2(keep_top_k, 1));
-    }
-
-    std::vector<nvinfer1::ITensor*> concat_inputs;
-    concat_inputs.push_back(nmsed_classes_reshape_layer->getOutput(0));
-    concat_inputs.push_back(nmsed_scores_transpose_layer->getOutput(0));
-    concat_inputs.push_back(nmsed_boxes);
-
-    auto nms_concat_layer = TRT_ENGINE_ADD_LAYER(
-        engine_, Concatenation, concat_inputs.data(), concat_inputs.size());
-    int axis_index = engine_->with_dynamic_shape() ? 1 : 0;
-    nms_concat_layer->setAxis(axis_index + 1);
-
-    ReplenishLayerAndOutput(
-        nms_concat_layer, "multiclass_nms", {output_name}, test_mode);
-  }
-};
-
-}  // namespace tensorrt
-}  // namespace inference
-}  // namespace paddle
-
-REGISTER_TRT_OP_CONVERTER(multiclass_nms, MultiClassNMSOpConverter);
diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt
index 9aa19af0ba809..90bc9ddc15284 100644
--- a/paddle/fluid/operators/detection/CMakeLists.txt
+++ b/paddle/fluid/operators/detection/CMakeLists.txt
@@ -38,7 +38,6 @@ detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc
                   polygon_box_transform_op.cu)
 detection_library(generate_proposal_labels_op SRCS
                   generate_proposal_labels_op.cc)
-detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc DEPS phi common)
 detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu)
 detection_library(box_decoder_and_assign_op SRCS box_decoder_and_assign_op.cc
                   box_decoder_and_assign_op.cu)
diff --git a/paddle/fluid/operators/detection/multiclass_nms_op.cc b/paddle/fluid/operators/detection/multiclass_nms_op.cc
deleted file mode 100644
index 73ec6caa61c27..0000000000000
--- a/paddle/fluid/operators/detection/multiclass_nms_op.cc
+++ /dev/null
@@ -1,630 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-limitations under the License. */
-
-#include <glog/logging.h>
-
-#include "paddle/fluid/framework/infershape_utils.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/phi/infermeta/ternary.h"
-#include "paddle/phi/kernels/funcs/detection/nms_util.h"
-
-namespace paddle {
-namespace operators {
-
-inline std::vector<size_t> GetNmsLodFromRoisNum(
-    const phi::DenseTensor* rois_num) {
-  std::vector<size_t> rois_lod;
-  auto* rois_num_data = rois_num->data<int>();
-  rois_lod.push_back(static_cast<size_t>(0));
-  for (int i = 0; i < rois_num->numel(); ++i) {
-    rois_lod.push_back(rois_lod.back() + static_cast<size_t>(rois_num_data[i]));
-  }
-  return rois_lod;
-}
-
-class MultiClassNMSOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    OP_INOUT_CHECK(ctx->HasInput("BBoxes"), "Input", "BBoxes", "MultiClassNMS");
-    OP_INOUT_CHECK(ctx->HasInput("Scores"), "Input", "Scores", "MultiClassNMS");
-    OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "MultiClassNMS");
-    auto box_dims = ctx->GetInputDim("BBoxes");
-    auto score_dims = ctx->GetInputDim("Scores");
-    int score_size = static_cast<int>(score_dims.size());
-
-    if (ctx->IsRuntime()) {
-      PADDLE_ENFORCE_EQ(score_size == 2 || score_size == 3,
-                        true,
-                        phi::errors::InvalidArgument(
-                            "The rank of Input(Scores) must be 2 or 3"
-                            ". But received rank = %d",
-                            score_size));
-      PADDLE_ENFORCE_EQ(
-          box_dims.size(),
-          3,
-          phi::errors::InvalidArgument("The rank of Input(BBoxes) must be 3"
-                                       ". But received rank = %d",
-                                       box_dims.size()));
-      if (score_size == 3) {
-        PADDLE_ENFORCE_EQ(box_dims[2] == 4 || box_dims[2] == 8 ||
-                              box_dims[2] == 16 || box_dims[2] == 24 ||
-                              box_dims[2] == 32,
-                          true,
-                          phi::errors::InvalidArgument(
-                              "The last dimension of Input"
-                              "(BBoxes) must be 4 or 8, "
-                              "represents the layout of coordinate "
-                              "[xmin, ymin, xmax, ymax] or "
-                              "4 points: [x1, y1, x2, y2, x3, y3, x4, y4] or "
-                              "8 points: [xi, yi] i= 1,2,...,8 or "
-                              "12 points: [xi, yi] i= 1,2,...,12 or "
-                              "16 points: [xi, yi] i= 1,2,...,16"));
-        PADDLE_ENFORCE_EQ(
-            box_dims[1],
-            score_dims[2],
-            phi::errors::InvalidArgument(
-                "The 2nd dimension of Input(BBoxes) must be equal to "
-                "last dimension of Input(Scores), which represents the "
-                "predicted bboxes."
-                "But received box_dims[1](%s) != socre_dims[2](%s)",
-                box_dims[1],
-                score_dims[2]));
-      } else {
-        PADDLE_ENFORCE_EQ(box_dims[2],
-                          4,
-                          phi::errors::InvalidArgument(
-                              "The last dimension of Input"
-                              "(BBoxes) must be 4. But received dimension = %d",
-                              box_dims[2]));
-        PADDLE_ENFORCE_EQ(
-            box_dims[1],
-            score_dims[1],
-            phi::errors::InvalidArgument(
-                "The 2nd dimension of Input"
-                "(BBoxes) must be equal to the 2nd dimension of Input(Scores). "
-                "But received box dimension = %d, score dimension = %d",
-                box_dims[1],
-                score_dims[1]));
-      }
-    }
-    // Here the box_dims[0] is not the real dimension of output.
-    // It will be rewritten in the computing kernel.
-    ctx->SetOutputDim("Out", {-1, box_dims[2] + 2});
-    if (!ctx->IsRuntime()) {
-      ctx->SetLoDLevel("Out", std::max(ctx->GetLoDLevel("BBoxes"), 1));
-    }
-  }
-
- protected:
-  phi::KernelKey GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    return phi::KernelKey(
-        OperatorWithKernel::IndicateVarDataType(ctx, "Scores"),
-        platform::CPUPlace());
-  }
-};
-
-template <class T>
-void SliceOneClass(const platform::DeviceContext& ctx,
-                   const phi::DenseTensor& items,
-                   const int class_id,
-                   phi::DenseTensor* one_class_item) {
-  T* item_data = one_class_item->mutable_data<T>(ctx.GetPlace());
-  const T* items_data = items.data<T>();
-  const int64_t num_item = items.dims()[0];
-  const int class_num = static_cast<int>(items.dims()[1]);
-  if (items.dims().size() == 3) {
-    int item_size = static_cast<int>(items.dims()[2]);
-    for (int i = 0; i < num_item; ++i) {
-      std::memcpy(item_data + i * item_size,
-                  items_data + i * class_num * item_size + class_id * item_size,
-                  sizeof(T) * item_size);
-    }
-  } else {
-    for (int i = 0; i < num_item; ++i) {
-      item_data[i] = items_data[i * class_num + class_id];
-    }
-  }
-}
-
-template <typename T, typename DeviceContext>
-class MultiClassNMSKernel : public framework::OpKernel<T> {
- public:
-  void NMSFast(const phi::DenseTensor& bbox,
-               const phi::DenseTensor& scores,
-               const T score_threshold,
-               const T nms_threshold,
-               const T eta,
-               const int64_t top_k,
-               std::vector<int>* selected_indices,
-               const bool normalized) const {
-    // The total boxes for each instance.
-    int64_t num_boxes = bbox.dims()[0];
-    // 4: [xmin ymin xmax ymax]
-    // 8: [x1 y1 x2 y2 x3 y3 x4 y4]
-    // 16, 24, or 32: [x1 y1 x2 y2 ...  xn yn], n = 8, 12 or 16
-    int64_t box_size = bbox.dims()[1];
-
-    std::vector<T> scores_data(num_boxes);
-    std::copy_n(scores.data<T>(), num_boxes, scores_data.begin());
-    std::vector<std::pair<T, int>> sorted_indices;
-    phi::funcs::GetMaxScoreIndex(
-        scores_data, score_threshold, top_k, &sorted_indices);
-
-    selected_indices->clear();
-    T adaptive_threshold = nms_threshold;
-    const T* bbox_data = bbox.data<T>();
-
-    while (!sorted_indices.empty()) {
-      const int idx = sorted_indices.front().second;
-      bool keep = true;
-      for (const auto kept_idx : *selected_indices) {
-        if (keep) {
-          T overlap = T(0.);
-          // 4: [xmin ymin xmax ymax]
-          if (box_size == 4) {
-            overlap =
-                phi::funcs::JaccardOverlap<T>(bbox_data + idx * box_size,
-                                              bbox_data + kept_idx * box_size,
-                                              normalized);
-          }
-          // 8: [x1 y1 x2 y2 x3 y3 x4 y4] or 16, 24, 32
-          if (box_size == 8 || box_size == 16 || box_size == 24 ||
-              box_size == 32) {
-            overlap = phi::funcs::PolyIoU<T>(bbox_data + idx * box_size,
-                                             bbox_data + kept_idx * box_size,
-                                             box_size,
-                                             normalized);
-          }
-          keep = overlap <= adaptive_threshold;
-        } else {
-          break;
-        }
-      }
-      if (keep) {
-        selected_indices->push_back(idx);
-      }
-      sorted_indices.erase(sorted_indices.begin());
-      if (keep && eta < 1 && adaptive_threshold > 0.5) {
-        adaptive_threshold *= eta;
-      }
-    }
-  }
-
-  void MultiClassNMS(const framework::ExecutionContext& ctx,
-                     const phi::DenseTensor& scores,
-                     const phi::DenseTensor& bboxes,
-                     const int scores_size,
-                     std::map<int, std::vector<int>>* indices,
-                     int* num_nmsed_out) const {
-    int64_t background_label = ctx.Attr<int>("background_label");
-    int64_t nms_top_k = ctx.Attr<int>("nms_top_k");
-    int64_t keep_top_k = ctx.Attr<int>("keep_top_k");
-    bool normalized = ctx.Attr<bool>("normalized");
-    T nms_threshold = static_cast<T>(ctx.Attr<float>("nms_threshold"));
-    T nms_eta = static_cast<T>(ctx.Attr<float>("nms_eta"));
-    T score_threshold = static_cast<T>(ctx.Attr<float>("score_threshold"));
-    auto& dev_ctx = ctx.template device_context<phi::CPUContext>();
-
-    int num_det = 0;
-
-    int64_t class_num = scores_size == 3 ? scores.dims()[0] : scores.dims()[1];
-    phi::DenseTensor bbox_slice, score_slice;
-    for (int64_t c = 0; c < class_num; ++c) {
-      if (c == background_label) continue;
-      if (scores_size == 3) {
-        score_slice = scores.Slice(c, c + 1);
-        bbox_slice = bboxes;
-      } else {
-        score_slice.Resize({scores.dims()[0], 1});
-        bbox_slice.Resize({scores.dims()[0], 4});
-        SliceOneClass<T>(dev_ctx, scores, c, &score_slice);
-        SliceOneClass<T>(dev_ctx, bboxes, c, &bbox_slice);
-      }
-      NMSFast(bbox_slice,
-              score_slice,
-              score_threshold,
-              nms_threshold,
-              nms_eta,
-              nms_top_k,
-              &((*indices)[c]),  // NOLINT
-              normalized);
-      if (scores_size == 2) {
-        std::stable_sort((*indices)[c].begin(), (*indices)[c].end());  // NOLINT
-      }
-      num_det += (*indices)[c].size();  // NOLINT
-    }
-
-    *num_nmsed_out = num_det;
-    const T* scores_data = scores.data<T>();
-    if (keep_top_k > -1 && num_det > keep_top_k) {
-      const T* sdata = nullptr;
-      std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
-      for (const auto& it : *indices) {
-        int label = it.first;
-        if (scores_size == 3) {
-          sdata = scores_data + label * scores.dims()[1];
-        } else {
-          score_slice.Resize({scores.dims()[0], 1});
-          SliceOneClass<T>(dev_ctx, scores, label, &score_slice);
-          sdata = score_slice.data<T>();
-        }
-        const std::vector<int>& label_indices = it.second;
-        for (auto idx : label_indices) {
-          score_index_pairs.push_back(
-              std::make_pair(sdata[idx], std::make_pair(label, idx)));
-        }
-      }
-      // Keep top k results per image.
-      std::stable_sort(score_index_pairs.begin(),
-                       score_index_pairs.end(),
-                       phi::funcs::SortScorePairDescend<std::pair<int, int>>);
-      score_index_pairs.resize(keep_top_k);
-
-      // Store the new indices.
-      std::map<int, std::vector<int>> new_indices;
-      for (auto& score_index_pair : score_index_pairs) {
-        int label = score_index_pair.second.first;
-        int idx = score_index_pair.second.second;
-        new_indices[label].push_back(idx);
-      }
-      if (scores_size == 2) {
-        for (const auto& it : new_indices) {
-          int label = it.first;
-          std::stable_sort(new_indices[label].begin(),
-                           new_indices[label].end());
-        }
-      }
-      new_indices.swap(*indices);
-      *num_nmsed_out = keep_top_k;  // NOLINT
-    }
-  }
-
-  void MultiClassOutput(const platform::DeviceContext& ctx,
-                        const phi::DenseTensor& scores,
-                        const phi::DenseTensor& bboxes,
-                        const std::map<int, std::vector<int>>& selected_indices,
-                        const int scores_size,
-                        phi::DenseTensor* outs,
-                        int* oindices = nullptr,
-                        const int offset = 0) const {
-    int64_t class_num = scores.dims()[1];
-    int64_t predict_dim = scores.dims()[1];
-    int64_t box_size = bboxes.dims()[1];
-    if (scores_size == 2) {
-      box_size = bboxes.dims()[2];
-    }
-    int64_t out_dim = box_size + 2;
-    auto* scores_data = scores.data<T>();
-    auto* bboxes_data = bboxes.data<T>();
-    auto* odata = outs->data<T>();
-    const T* sdata = nullptr;
-    phi::DenseTensor bbox;
-    bbox.Resize({scores.dims()[0], box_size});
-    int count = 0;
-    for (const auto& it : selected_indices) {
-      int label = it.first;
-      const std::vector<int>& indices = it.second;
-      if (scores_size == 2) {
-        SliceOneClass<T>(ctx, bboxes, label, &bbox);
-      } else {
-        sdata = scores_data + label * predict_dim;
-      }
-
-      for (auto idx : indices) {
-        odata[count * out_dim] = label;  // label
-        const T* bdata = nullptr;
-        if (scores_size == 3) {
-          bdata = bboxes_data + idx * box_size;
-          odata[count * out_dim + 1] = sdata[idx];  // score
-          if (oindices != nullptr) {
-            oindices[count] = offset + idx;
-          }
-        } else {
-          bdata = bbox.data<T>() + idx * box_size;
-          odata[count * out_dim + 1] = *(scores_data + idx * class_num + label);
-          if (oindices != nullptr) {
-            oindices[count] =
-                static_cast<int>(offset + idx * class_num + label);
-          }
-        }
-        // xmin, ymin, xmax, ymax or multi-points coordinates
-        std::memcpy(odata + count * out_dim + 2, bdata, box_size * sizeof(T));
-        count++;
-      }
-    }
-  }
-
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* boxes = ctx.Input<phi::DenseTensor>("BBoxes");
-    auto* scores = ctx.Input<phi::DenseTensor>("Scores");
-    auto* outs = ctx.Output<phi::DenseTensor>("Out");
-    bool return_index = ctx.HasOutput("Index") ? true : false;
-    auto index = ctx.Output<phi::DenseTensor>("Index");
-    bool has_roisnum = ctx.HasInput("RoisNum") ? true : false;
-    auto rois_num = ctx.Input<phi::DenseTensor>("RoisNum");
-    auto score_dims = common::vectorize<int>(scores->dims());
-    auto score_size = score_dims.size();
-    auto& dev_ctx = ctx.template device_context<phi::CPUContext>();
-
-    std::vector<std::map<int, std::vector<int>>> all_indices;
-    std::vector<size_t> batch_starts = {0};
-    int64_t batch_size = score_dims[0];
-    int64_t box_dim = boxes->dims()[2];
-    int64_t out_dim = box_dim + 2;
-    int num_nmsed_out = 0;
-    phi::DenseTensor boxes_slice, scores_slice;
-    int n = 0;
-    if (has_roisnum) {
-      n = static_cast<int>(score_size == 3 ? batch_size : rois_num->numel());
-    } else {
-      n = static_cast<int>(score_size == 3 ? batch_size
-                                           : boxes->lod().back().size() - 1);
-    }
-    for (int i = 0; i < n; ++i) {
-      std::map<int, std::vector<int>> indices;
-      if (score_size == 3) {
-        scores_slice = scores->Slice(i, i + 1);
-        scores_slice.Resize({score_dims[1], score_dims[2]});
-        boxes_slice = boxes->Slice(i, i + 1);
-        boxes_slice.Resize({score_dims[2], box_dim});
-      } else {
-        std::vector<size_t> boxes_lod;
-        if (has_roisnum) {
-          boxes_lod = GetNmsLodFromRoisNum(rois_num);
-        } else {
-          boxes_lod = boxes->lod().back();
-        }
-        if (boxes_lod[i] == boxes_lod[i + 1]) {
-          all_indices.push_back(indices);
-          batch_starts.push_back(batch_starts.back());
-          continue;
-        }
-        scores_slice = scores->Slice(static_cast<int64_t>(boxes_lod[i]),
-                                     static_cast<int64_t>(boxes_lod[i + 1]));
-        boxes_slice = boxes->Slice(static_cast<int64_t>(boxes_lod[i]),
-                                   static_cast<int64_t>(boxes_lod[i + 1]));
-      }
-      MultiClassNMS(
-          ctx, scores_slice, boxes_slice, score_size, &indices, &num_nmsed_out);
-      all_indices.push_back(indices);
-      batch_starts.push_back(batch_starts.back() + num_nmsed_out);
-    }
-
-    int num_kept = static_cast<int>(batch_starts.back());
-    if (num_kept == 0) {
-      if (return_index) {
-        outs->mutable_data<T>({0, out_dim}, ctx.GetPlace());
-        index->mutable_data<int>({0, 1}, ctx.GetPlace());
-      } else {
-        T* od = outs->mutable_data<T>({1, 1}, ctx.GetPlace());
-        od[0] = -1;
-        batch_starts = {0, 1};
-      }
-    } else {
-      outs->mutable_data<T>({num_kept, out_dim}, ctx.GetPlace());
-      int offset = 0;
-      int* oindices = nullptr;
-      for (int i = 0; i < n; ++i) {
-        if (score_size == 3) {
-          scores_slice = scores->Slice(i, i + 1);
-          boxes_slice = boxes->Slice(i, i + 1);
-          scores_slice.Resize({score_dims[1], score_dims[2]});
-          boxes_slice.Resize({score_dims[2], box_dim});
-          if (return_index) {
-            offset = i * score_dims[2];
-          }
-        } else {
-          std::vector<size_t> boxes_lod;
-          if (has_roisnum) {
-            boxes_lod = GetNmsLodFromRoisNum(rois_num);
-          } else {
-            boxes_lod = boxes->lod().back();
-          }
-          if (boxes_lod[i] == boxes_lod[i + 1]) continue;
-          scores_slice = scores->Slice(static_cast<int64_t>(boxes_lod[i]),
-                                       static_cast<int64_t>(boxes_lod[i + 1]));
-          boxes_slice = boxes->Slice(static_cast<int64_t>(boxes_lod[i]),
-                                     static_cast<int64_t>(boxes_lod[i + 1]));
-          if (return_index) {
-            offset = static_cast<int>(boxes_lod[i] * score_dims[1]);
-          }
-        }
-
-        int64_t s = static_cast<int64_t>(batch_starts[i]);
-        int64_t e = static_cast<int64_t>(batch_starts[i + 1]);
-        if (e > s) {
-          phi::DenseTensor out = outs->Slice(s, e);
-          if (return_index) {
-            int* output_idx =
-                index->mutable_data<int>({num_kept, 1}, ctx.GetPlace());
-            oindices = output_idx + s;
-          }
-          MultiClassOutput(dev_ctx,
-                           scores_slice,
-                           boxes_slice,
-                           all_indices[i],
-                           score_dims.size(),
-                           &out,
-                           oindices,
-                           offset);
-        }
-      }
-    }
-    if (ctx.HasOutput("NmsRoisNum")) {
-      auto* nms_rois_num = ctx.Output<phi::DenseTensor>("NmsRoisNum");
-      nms_rois_num->mutable_data<int>({n}, ctx.GetPlace());
-      int* num_data = nms_rois_num->data<int>();
-      for (int i = 1; i <= n; i++) {
-        num_data[i - 1] =
-            static_cast<int>(batch_starts[i] - batch_starts[i - 1]);
-      }
-      nms_rois_num->Resize({n});
-    }
-
-    framework::LoD lod;
-    lod.emplace_back(batch_starts);
-    if (return_index) {
-      index->set_lod(lod);
-    }
-    outs->set_lod(lod);
-  }
-};
-
-class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput("BBoxes",
-             "Two types of bboxes are supported:"
-             "1. (Tensor) A 3-D Tensor with shape "
-             "[N, M, 4 or 8 16 24 32] represents the "
-             "predicted locations of M bounding bboxes, N is the batch size. "
-             "Each bounding box has four coordinate values and the layout is "
-             "[xmin, ymin, xmax, ymax], when box size equals to 4."
-             "2. (phi::DenseTensor) A 3-D Tensor with shape [M, C, 4]"
-             "M is the number of bounding boxes, C is the class number");
-    AddInput("Scores",
-             "Two types of scores are supported:"
-             "1. (Tensor) A 3-D Tensor with shape [N, C, M] represents the "
-             "predicted confidence predictions. N is the batch size, C is the "
-             "class number, M is number of bounding boxes. For each category "
-             "there are total M scores which corresponding M bounding boxes. "
-             " Please note, M is equal to the 2nd dimension of BBoxes. "
-             "2. (phi::DenseTensor) A 2-D phi::DenseTensor with shape [M, C]. "
-             "M is the number of bbox, C is the class number. In this case, "
-             "Input BBoxes should be the second case with shape [M, C, 4].");
-    AddAttr<int>(
-        "background_label",
-        "(int, default: 0) "
-        "The index of background label, the background label will be ignored. "
-        "If set to -1, then all categories will be considered.")
-        .SetDefault(0);
-    AddAttr<float>("score_threshold",
-                   "(float) "
-                   "Threshold to filter out bounding boxes with low "
-                   "confidence score. If not provided, consider all boxes.");
-    AddAttr<int>("nms_top_k",
-                 "(int64_t) "
-                 "Maximum number of detections to be kept according to the "
-                 "confidences after the filtering detections based on "
-                 "score_threshold");
-    AddAttr<float>("nms_threshold",
-                   "(float, default: 0.3) "
-                   "The threshold to be used in NMS.")
-        .SetDefault(0.3);
-    AddAttr<float>("nms_eta",
-                   "(float) "
-                   "The parameter for adaptive NMS.")
-        .SetDefault(1.0);
-    AddAttr<int>("keep_top_k",
-                 "(int64_t) "
-                 "Number of total bboxes to be kept per image after NMS "
-                 "step. -1 means keeping all bboxes after NMS step.");
-    AddAttr<bool>("normalized",
-                  "(bool, default true) "
-                  "Whether detections are normalized.")
-        .SetDefault(true);
-    AddOutput("Out",
-              "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] "
-              "represents the "
-              "detections. Each row has 6 values: "
-              "[label, confidence, xmin, ymin, xmax, ymax] or "
-              "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 10] "
-              "represents the "
-              "detections. Each row has 10 values: "
-              "[label, confidence, x1, y1, x2, y2, x3, y3, x4, y4]. No is the "
-              "total number of detections in this mini-batch."
-              "For each instance, "
-              "the offsets in first dimension are called LoD, the number of "
-              "offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is "
-              "no detected bbox.");
-    AddComment(R"DOC(
-This operator is to do multi-class non maximum suppression (NMS) on a batched
-of boxes and scores.
-In the NMS step, this operator greedily selects a subset of detection bounding
-boxes that have high scores larger than score_threshold, if providing this
-threshold, then selects the largest nms_top_k confidences scores if nms_top_k
-is larger than -1. Then this operator prunes away boxes that have high IOU
-(intersection over union) overlap with already selected boxes by adaptive
-threshold NMS based on parameters of nms_threshold and nms_eta.
-After NMS step, at most keep_top_k number of total bboxes are to be kept
-per image if keep_top_k is larger than -1.
-This operator support multi-class and batched inputs. It applying NMS
-independently for each class. The outputs is a 2-D LoDTensor, for each
-image, the offsets in first dimension of phi::DenseTensor are called LoD, the number
-of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
-means there is no detected bbox for this image.
-)DOC");
-  }
-};
-
-class MultiClassNMS2Op : public MultiClassNMSOp {
- public:
-  MultiClassNMS2Op(const std::string& type,
-                   const framework::VariableNameMap& inputs,
-                   const framework::VariableNameMap& outputs,
-                   const framework::AttributeMap& attrs)
-      : MultiClassNMSOp(type, inputs, outputs, attrs) {}
-
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    MultiClassNMSOp::InferShape(ctx);
-    ctx->SetOutputDim("Index", {-1, 1});
-    if (!ctx->IsRuntime()) {
-      ctx->SetLoDLevel("Index", std::max(ctx->GetLoDLevel("BBoxes"), 1));
-    }
-  }
-};
-
-class MultiClassNMS2OpMaker : public MultiClassNMSOpMaker {
- public:
-  void Make() override {
-    MultiClassNMSOpMaker::Make();
-    AddOutput("Index",
-              "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 1] "
-              "represents the "
-              "index of selected bbox. The index is the absolute index cross "
-              "batches.")
-        .AsIntermediate();
-  }
-};
-
-template <typename T, typename DeviceContext>
-class MultiClassNMS2Kernel : public MultiClassNMSKernel<T, DeviceContext> {};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-REGISTER_OPERATOR(
-    multiclass_nms,
-    ops::MultiClassNMSOp,
-    ops::MultiClassNMSOpMaker,
-    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
-    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
-PD_REGISTER_STRUCT_KERNEL(
-    multiclass_nms, CPU, ALL_LAYOUT, ops::MultiClassNMSKernel, float, double) {}
-
-REGISTER_OPERATOR(
-    multiclass_nms2,
-    ops::MultiClassNMS2Op,
-    ops::MultiClassNMS2OpMaker,
-    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
-    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
-PD_REGISTER_STRUCT_KERNEL(multiclass_nms2,
-                          CPU,
-                          ALL_LAYOUT,
-                          ops::MultiClassNMS2Kernel,
-                          float,
-                          double) {}
diff --git a/python/paddle/incubate/layers/__init__.py b/python/paddle/incubate/layers/__init__.py
index 5430d1108cecb..0d1d840b4f6a4 100644
--- a/python/paddle/incubate/layers/__init__.py
+++ b/python/paddle/incubate/layers/__init__.py
@@ -22,7 +22,6 @@
     fused_bn_add_act,
     fused_embedding_seq_pool,
     fused_seqpool_cvm,
-    multiclass_nms2,
     partial_concat,
     partial_sum,
     pow2_decay_with_linear_warmup,
diff --git a/python/paddle/incubate/layers/nn.py b/python/paddle/incubate/layers/nn.py
index aee7f2b9088de..01c7b38f84107 100644
--- a/python/paddle/incubate/layers/nn.py
+++ b/python/paddle/incubate/layers/nn.py
@@ -192,136 +192,6 @@ def fused_seqpool_cvm(
     return outs
 
 
-def multiclass_nms2(
-    bboxes,
-    scores,
-    score_threshold,
-    nms_top_k,
-    keep_top_k,
-    nms_threshold=0.3,
-    normalized=True,
-    nms_eta=1.0,
-    background_label=0,
-    return_index=False,
-    name=None,
-):
-    """
-    **Multiclass NMS2**
-
-    This operator is to do multi-class non maximum suppression (NMS) on
-    boxes and scores.
-    In the NMS step, this operator greedily selects a subset of detection bounding
-    boxes that have high scores larger than score_threshold, if providing this
-    threshold, then selects the largest nms_top_k confidences scores if nms_top_k
-    is larger than -1. Then this operator prunes away boxes that have high IOU
-    (intersection over union) overlap with already selected boxes by adaptive
-    threshold NMS based on parameters of nms_threshold and nms_eta.
-    After NMS step, at most keep_top_k number of total bboxes are to be kept
-    per image if keep_top_k is larger than -1.
-
-    Args:
-        bboxes (Tensor): Two types of bboxes are supported:
-                           1. (Tensor) A 3-D Tensor with shape
-                           [N, M, 4 or 8 16 24 32] represents the
-                           predicted locations of M bounding bboxes,
-                           N is the batch size. Each bounding box has four
-                           coordinate values and the layout is
-                           [xmin, ymin, xmax, ymax], when box size equals to 4.
-                           2. (LoDTensor) A 3-D Tensor with shape [M, C, 4]
-                           M is the number of bounding boxes, C is the
-                           class number.
-        scores (Tensor): Two types of scores are supported:
-                           1. (Tensor) A 3-D Tensor with shape [N, C, M]
-                           represents the predicted confidence predictions.
-                           N is the batch size, C is the class number, M is
-                           number of bounding boxes. For each category there
-                           are total M scores which corresponding M bounding
-                           boxes. Please note, M is equal to the 2nd dimension
-                           of BBoxes.
-                           2. (LoDTensor) A 2-D LoDTensor with shape [M, C].
-                           M is the number of bbox, C is the class number.
-                           In this case, input BBoxes should be the second
-                           case with shape [M, C, 4].
-        score_threshold (float): Threshold to filter out bounding boxes with
-                                 low confidence score. If not provided,
-                                 consider all boxes.
-        nms_top_k (int): Maximum number of detections to be kept according to
-                         the confidences after the filtering detections based
-                         on score_threshold.
-        keep_top_k (int): Number of total bboxes to be kept per image after NMS
-                          step. -1 means keeping all bboxes after NMS step.
-        nms_threshold (float, optional): The threshold to be used in NMS. Default: 0.3.
-        normalized (bool, optional): Whether detections are normalized. Default: True.
-        nms_eta (float, optional): The threshold to be used in NMS. Default: 1.0.
-        background_label (int, optional): The index of background label, the background
-                                label will be ignored. If set to -1, then all
-                                categories will be considered. Default: 0.
-        return_index(bool, optional): Whether return selected index. Default: False.
-        name(str, optional): Name of the multiclass nms op. Default: None.
-
-    Returns:
-        A tuple with two dimensions of the tensor: (Out, Index) if return_index is True,
-        otherwise, a tuple with one dimension of the tensor(Out) is returned.
-        Out: A 2-D LoDTensor with shape [No, 6] represents the detections.
-        Each row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]
-        or A 2-D LoDTensor with shape [No, 10] represents the detections.
-        Each row has 10 values: [label, confidence, x1, y1, x2, y2, x3, y3,
-        x4, y4]. No is the total number of detections.
-        If all images have not detected results, all elements in LoD will be
-        0, and output tensor is empty (None).
-        Index: Only return when return_index is True. A 2-D LoDTensor with
-        shape [No, 1] represents the selected index which type is Integer.
-        The index is the absolute value cross batches. No is the same number
-        as Out. If the index is used to gather other attribute such as age,
-        one needs to reshape the input(N, M, 1) to (N * M, 1) as first, where
-        N is the batch size and M is the number of boxes.
-
-
-    Examples:
-        .. code-block:: python
-
-            >>> import paddle
-            >>> paddle.enable_static()
-            >>> boxes = paddle.static.data(name='bboxes', shape=[-1, 81, 4],
-            ...                           dtype='float32', lod_level=1)
-            >>> scores = paddle.static.data(name='scores', shape=[-1, 81],
-            ...                           dtype='float32', lod_level=1)
-            >>> out, index = paddle.incubate.layers.multiclass_nms2(bboxes=boxes,
-            ...                                   scores=scores,
-            ...                                   background_label=0,
-            ...                                   score_threshold=0.5,
-            ...                                   nms_top_k=400,
-            ...                                   nms_threshold=0.3,
-            ...                                   keep_top_k=200,
-            ...                                   normalized=False,
-            ...                                   return_index=True)
-    """
-    helper = LayerHelper('multiclass_nms2', **locals())
-
-    output = helper.create_variable_for_type_inference(dtype=bboxes.dtype)
-    index = helper.create_variable_for_type_inference(dtype='int')
-    helper.append_op(
-        type="multiclass_nms2",
-        inputs={'BBoxes': bboxes, 'Scores': scores},
-        attrs={
-            'background_label': background_label,
-            'score_threshold': score_threshold,
-            'nms_top_k': nms_top_k,
-            'nms_threshold': nms_threshold,
-            'keep_top_k': keep_top_k,
-            'nms_eta': nms_eta,
-            'normalized': normalized,
-        },
-        outputs={'Out': output, 'Index': index},
-    )
-    output.stop_gradient = True
-    index.stop_gradient = True
-
-    if return_index:
-        return output, index
-    return output
-
-
 def search_pyramid_hash(
     input,
     num_emb,
diff --git a/test/legacy_test/test_detection.py b/test/legacy_test/test_detection.py
index e8819c9dc62bf..397b3adba5387 100644
--- a/test/legacy_test/test_detection.py
+++ b/test/legacy_test/test_detection.py
@@ -159,27 +159,6 @@ def test_generate_proposals(self):
         np.testing.assert_array_equal(np.array(rois_num_stat), rois_num_dy)
 
 
-class TestMulticlassNMS2(unittest.TestCase):
-    def test_multiclass_nms2(self):
-        program = Program()
-        with program_guard(program):
-            bboxes = paddle.static.data(
-                name='bboxes', shape=[-1, 10, 4], dtype='float32'
-            )
-            scores = paddle.static.data(
-                name='scores', shape=[-1, 10], dtype='float32'
-            )
-            output = paddle.incubate.layers.multiclass_nms2(
-                bboxes, scores, 0.3, 400, 200, 0.7
-            )
-            output2, index = paddle.incubate.layers.multiclass_nms2(
-                bboxes, scores, 0.3, 400, 200, 0.7, return_index=True
-            )
-            self.assertIsNotNone(output)
-            self.assertIsNotNone(output2)
-            self.assertIsNotNone(index)
-
-
 class TestDistributeFpnProposals(LayerTest):
     def static_distribute_fpn_proposals(self, rois_np, rois_num_np):
         with self.static_graph():
diff --git a/test/legacy_test/test_multiclass_nms_op.py b/test/legacy_test/test_multiclass_nms_op.py
index 7262bf88f7a1a..bd2a29a359fa4 100644
--- a/test/legacy_test/test_multiclass_nms_op.py
+++ b/test/legacy_test/test_multiclass_nms_op.py
@@ -20,7 +20,6 @@
 
 import paddle
 from paddle import _C_ops
-from paddle.base import core
 from paddle.base.layer_helper import LayerHelper
 
 
@@ -506,62 +505,6 @@ def test_check_output(self):
         self.check_output()
 
 
-class TestMulticlassNMSNoBox(TestMulticlassNMSLoDInput):
-    def setUp(self):
-        self.set_argument()
-        M = 1200
-        C = 21
-        BOX_SIZE = 4
-        box_lod = [[0, 1200, 0]]
-        background = 0
-        nms_threshold = 0.3
-        nms_top_k = 400
-        keep_top_k = 200
-        score_threshold = self.score_threshold
-        normalized = False
-
-        scores = np.random.random((M, C)).astype('float32')
-
-        scores = np.apply_along_axis(softmax, 1, scores)
-
-        boxes = np.random.random((M, C, BOX_SIZE)).astype('float32')
-        boxes[:, :, 0] = boxes[:, :, 0] * 10
-        boxes[:, :, 1] = boxes[:, :, 1] * 10
-        boxes[:, :, 2] = boxes[:, :, 2] * 10 + 10
-        boxes[:, :, 3] = boxes[:, :, 3] * 10 + 10
-
-        det_outs, lod = lod_multiclass_nms(
-            boxes,
-            scores,
-            background,
-            score_threshold,
-            nms_threshold,
-            nms_top_k,
-            keep_top_k,
-            box_lod,
-            normalized,
-        )
-        det_outs = np.array(det_outs).astype('float32')
-        nmsed_outs = (
-            det_outs[:, :-1].astype('float32') if len(det_outs) else det_outs
-        )
-        self.op_type = 'multiclass_nms'
-        self.inputs = {
-            'BBoxes': (boxes, box_lod),
-            'Scores': (scores, box_lod),
-        }
-        self.outputs = {'Out': (nmsed_outs, [lod])}
-        self.attrs = {
-            'background_label': 0,
-            'nms_threshold': nms_threshold,
-            'nms_top_k': nms_top_k,
-            'keep_top_k': keep_top_k,
-            'score_threshold': score_threshold,
-            'nms_eta': 1.0,
-            'normalized': normalized,
-        }
-
-
 class TestIOU(unittest.TestCase):
     def test_iou(self):
         box1 = np.array([4.0, 3.0, 7.0, 5.0]).astype('float32')
@@ -574,153 +517,13 @@ def test_iou(self):
 
 class TestMulticlassNMS2Op(TestMulticlassNMSOp):
     def setUp(self):
-        self.set_argument()
-        N = 7
-        M = 1200
-        C = 21
-        BOX_SIZE = 4
-        background = 0
-        nms_threshold = 0.3
-        nms_top_k = 400
-        keep_top_k = 200
-        score_threshold = self.score_threshold
-
-        scores = np.random.random((N * M, C)).astype('float32')
-
-        scores = np.apply_along_axis(softmax, 1, scores)
-        scores = np.reshape(scores, (N, M, C))
-        scores = np.transpose(scores, (0, 2, 1))
-
-        boxes = np.random.random((N, M, BOX_SIZE)).astype('float32')
-        boxes[:, :, 0:2] = boxes[:, :, 0:2] * 0.5
-        boxes[:, :, 2:4] = boxes[:, :, 2:4] * 0.5 + 0.5
-
-        det_outs, lod = batched_multiclass_nms(
-            boxes,
-            scores,
-            background,
-            score_threshold,
-            nms_threshold,
-            nms_top_k,
-            keep_top_k,
-        )
-        det_outs = np.array(det_outs)
-
-        nmsed_outs = (
-            det_outs[:, :-1].astype('float32')
-            if len(det_outs)
-            else np.array([], dtype=np.float32).reshape([0, BOX_SIZE + 2])
-        )
-        index_outs = (
-            det_outs[:, -1:].astype('int')
-            if len(det_outs)
-            else np.array([], dtype='int').reshape([0, 1])
-        )
-        self.op_type = 'multiclass_nms2'
-        self.inputs = {'BBoxes': boxes, 'Scores': scores}
-        self.outputs = {
-            'Out': (nmsed_outs, [lod]),
-            'Index': (index_outs, [lod]),
-        }
-        self.attrs = {
-            'background_label': 0,
-            'nms_threshold': nms_threshold,
-            'nms_top_k': nms_top_k,
-            'keep_top_k': keep_top_k,
-            'score_threshold': score_threshold,
-            'nms_eta': 1.0,
-            'normalized': True,
-        }
-
-    def test_check_output(self):
-        self.check_output()
-
-
-class TestMulticlassNMS2OpNoOutput(TestMulticlassNMS2Op):
-    def set_argument(self):
-        # Here set 2.0 to test the case there is no outputs.
-        # In practical use, 0.0 < score_threshold < 1.0
-        self.score_threshold = 2.0
-
-
-class TestMulticlassNMS2LoDInput(TestMulticlassNMSLoDInput):
-    def setUp(self):
-        self.set_argument()
-        M = 1200
-        C = 21
-        BOX_SIZE = 4
-        box_lod = [[1200]]
-        background = 0
-        nms_threshold = 0.3
-        nms_top_k = 400
-        keep_top_k = 200
-        score_threshold = self.score_threshold
-        normalized = False
-
-        scores = np.random.random((M, C)).astype('float32')
-
-        scores = np.apply_along_axis(softmax, 1, scores)
-
-        boxes = np.random.random((M, C, BOX_SIZE)).astype('float32')
-        boxes[:, :, 0] = boxes[:, :, 0] * 10
-        boxes[:, :, 1] = boxes[:, :, 1] * 10
-        boxes[:, :, 2] = boxes[:, :, 2] * 10 + 10
-        boxes[:, :, 3] = boxes[:, :, 3] * 10 + 10
-
-        det_outs, lod = lod_multiclass_nms(
-            boxes,
-            scores,
-            background,
-            score_threshold,
-            nms_threshold,
-            nms_top_k,
-            keep_top_k,
-            box_lod,
-            normalized,
-        )
-
-        det_outs = np.array(det_outs)
-        nmsed_outs = (
-            det_outs[:, :-1].astype('float32')
-            if len(det_outs)
-            else np.array([], dtype=np.float32).reshape([0, BOX_SIZE + 2])
-        )
-        index_outs = (
-            det_outs[:, -1:].astype('int')
-            if len(det_outs)
-            else np.array([], dtype='int').reshape([0, 1])
-        )
-        self.op_type = 'multiclass_nms2'
-        self.inputs = {
-            'BBoxes': (boxes, box_lod),
-            'Scores': (scores, box_lod),
-        }
-        self.outputs = {
-            'Out': (nmsed_outs, [lod]),
-            'Index': (index_outs, [lod]),
-        }
-        self.attrs = {
-            'background_label': 0,
-            'nms_threshold': nms_threshold,
-            'nms_top_k': nms_top_k,
-            'keep_top_k': keep_top_k,
-            'score_threshold': score_threshold,
-            'nms_eta': 1.0,
-            'normalized': normalized,
-        }
+        pass
 
 
 def test_check_output(self):
     self.check_output()
 
 
-class TestMulticlassNMS2LoDNoOutput(TestMulticlassNMS2LoDInput):
-    def set_argument(self):
-        # Here set 2.0 to test the case there is no outputs.
-        # In practical use, 0.0 < score_threshold < 1.0
-        self.score_threshold = 2.0
-
-
 class TestMulticlassNMS3Op(TestMulticlassNMS2Op):
     def setUp(self):
         self.python_api = multiclass_nms3
@@ -794,53 +597,3 @@ def set_argument(self):
         # Here set 2.0 to test the case there is no outputs.
         # In practical use, 0.0 < score_threshold < 1.0
         self.score_threshold = 2.0
-
-
-@unittest.skipIf(
-    not core.is_compiled_with_cuda(), "core is not compiled with CUDA"
-)
-class TestMulticlassNMS3OpGPU(TestMulticlassNMS2Op):
-    def test_check_output(self):
-        place = paddle.CUDAPlace(0)
-        self.check_output_with_place(place)
-
-    def set_argument(self):
-        self.score_threshold = 0.01
-        self.gpu_logic = True
-
-
-@unittest.skipIf(
-    not core.is_compiled_with_cuda(), "core is not compiled with CUDA"
-)
-class TestMulticlassNMS3OpGPULessOutput(TestMulticlassNMS3OpGPU):
-    def set_argument(self):
-        # Here set 0.08 to make output box size less than keep_top_k
-        self.score_threshold = 0.08
-        self.gpu_logic = True
-
-
-@unittest.skipIf(
-    not core.is_compiled_with_cuda(), "core is not compiled with CUDA"
-)
-class TestMulticlassNMS3OpGPUNoOutput(TestMulticlassNMS3OpGPU):
-    def set_argument(self):
-        # Here set 2.0 to test the case there is no outputs.
-        # In practical use, 0.0 < score_threshold < 1.0
-        self.score_threshold = 2.0
-        self.gpu_logic = True
-
-
-@unittest.skipIf(
-    not core.is_compiled_with_cuda(), "core is not compiled with CUDA"
-)
-class TestMulticlassNMS3OpGPUFallback(TestMulticlassNMS3OpGPU):
-    def set_argument(self):
-        # Setting keep_top_k < 0 will fall back to CPU kernel
-        self.score_threshold = 0.01
-        self.keep_top_k = -1
-        self.gpu_logic = True
-
-
-if __name__ == '__main__':
-    paddle.enable_static()
-    unittest.main()

From 822f9e3a7006b788be0f27db211b50426667301a Mon Sep 17 00:00:00 2001
From: co63oc <co63@163.com>
Date: Wed, 17 Apr 2024 13:36:31 +0800
Subject: [PATCH 2/2] Fix

---
 paddle/fluid/inference/api/analysis_predictor.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index d4a73175b3222..71b6603161232 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -3294,7 +3294,6 @@ USE_TRT_CONVERTER(arg_max);
 USE_TRT_CONVERTER(arg_min);
 USE_TRT_CONVERTER(roi_align);
 USE_TRT_CONVERTER(affine_channel);
-USE_TRT_CONVERTER(multiclass_nms);
 USE_TRT_CONVERTER(multiclass_nms3);
 USE_TRT_CONVERTER(nearest_interp);
 USE_TRT_CONVERTER(nearest_interp_v2);