From ae408f526e28f0fc9a9da54db65c5fee86f68900 Mon Sep 17 00:00:00 2001 From: co63oc Date: Wed, 17 Apr 2024 07:26:46 +0800 Subject: [PATCH 1/2] Fix --- paddle/fluid/framework/op_compatible_info.cc | 2 - .../inference/tensorrt/convert/CMakeLists.txt | 1 - .../tensorrt/convert/multiclass_nms_op.cc | 163 ----- .../fluid/operators/detection/CMakeLists.txt | 1 - .../operators/detection/multiclass_nms_op.cc | 630 ------------------ python/paddle/incubate/layers/__init__.py | 1 - python/paddle/incubate/layers/nn.py | 130 ---- test/legacy_test/test_detection.py | 21 - test/legacy_test/test_multiclass_nms_op.py | 249 +------ 9 files changed, 1 insertion(+), 1197 deletions(-) delete mode 100644 paddle/fluid/inference/tensorrt/convert/multiclass_nms_op.cc delete mode 100644 paddle/fluid/operators/detection/multiclass_nms_op.cc diff --git a/paddle/fluid/framework/op_compatible_info.cc b/paddle/fluid/framework/op_compatible_info.cc index 203d177bba916..1a37422323283 100644 --- a/paddle/fluid/framework/op_compatible_info.cc +++ b/paddle/fluid/framework/op_compatible_info.cc @@ -92,8 +92,6 @@ void OpCompatibleMap::InitOpCompatibleMap() { OpCompatibleType::definite_not}; op_compatible_map_["match_matrix_tensor"] = {"1.6.0", OpCompatibleType::definite_not}; - op_compatible_map_["multiclass_nms2"] = {"1.6.0", - OpCompatibleType::definite_not}; op_compatible_map_["one_hot_v2"] = {"1.6.0", OpCompatibleType::definite_not}; op_compatible_map_["pull_box_sparse"] = {"1.6.0", OpCompatibleType::definite_not}; diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 7cc4201420cb2..48e7f8b87b54b 100755 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -55,7 +55,6 @@ list( arg_min_op.cc roi_align_op.cc affine_channel_op.cc - multiclass_nms_op.cc multiclass_nms3_op.cc nearest_interp_op.cc reshape_op.cc diff --git a/paddle/fluid/inference/tensorrt/convert/multiclass_nms_op.cc b/paddle/fluid/inference/tensorrt/convert/multiclass_nms_op.cc deleted file mode 100644 index e14ee099aa0f8..0000000000000 --- a/paddle/fluid/inference/tensorrt/convert/multiclass_nms_op.cc +++ /dev/null @@ -1,163 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" - -namespace paddle { -namespace inference { -namespace tensorrt { - -class MultiClassNMSOpConverter : public OpConverter { - public: - void operator()(const framework::proto::OpDesc& op, - const framework::Scope& scope, - bool test_mode) override { - VLOG(3) << "convert a multiclassNMS op to tensorrt plugin"; - - // for now, only work for static shape and regular tensor - framework::OpDesc op_desc(op, nullptr); - - std::string bboxes = op_desc.Input("BBoxes").front(); - std::string scores = op_desc.Input("Scores").front(); - std::string output_name = op_desc.Output("Out").front(); - - auto* bboxes_tensor = engine_->GetITensor(bboxes); - auto* scores_tensor = engine_->GetITensor(scores); - - int background_label = - PADDLE_GET_CONST(int, op_desc.GetAttr("background_label")); - float score_threshold = - PADDLE_GET_CONST(float, op_desc.GetAttr("score_threshold")); - int nms_top_k = PADDLE_GET_CONST(int, op_desc.GetAttr("nms_top_k")); - float nms_threshold = - PADDLE_GET_CONST(float, op_desc.GetAttr("nms_threshold")); - int keep_top_k = PADDLE_GET_CONST(int, op_desc.GetAttr("keep_top_k")); - bool normalized = PADDLE_GET_CONST(bool, op_desc.GetAttr("normalized")); - int class_index = engine_->with_dynamic_shape() ? 1 : 0; - int num_classes = scores_tensor->getDimensions().d[class_index]; - - auto bboxes_dims = bboxes_tensor->getDimensions(); - nvinfer1::IShuffleLayer* bboxes_expand_layer = nullptr; - nvinfer1::IShuffleLayer* scores_transpose_layer = nullptr; - if (engine_->with_dynamic_shape()) { - nvinfer1::Dims4 bboxes_expand_dims( - bboxes_dims.d[0], bboxes_dims.d[1], 1, bboxes_dims.d[2]); - bboxes_expand_layer = - TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *bboxes_tensor); - bboxes_expand_layer->setReshapeDimensions(bboxes_expand_dims); - - nvinfer1::Permutation permutation{0, 2, 1}; - scores_transpose_layer = - TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *scores_tensor); - scores_transpose_layer->setFirstTranspose(permutation); - } else { - nvinfer1::Dims3 bboxes_expand_dims(bboxes_dims.d[0], 1, bboxes_dims.d[1]); - bboxes_expand_layer = - TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *bboxes_tensor); - bboxes_expand_layer->setReshapeDimensions(bboxes_expand_dims); - - nvinfer1::Permutation permutation{1, 0}; - scores_transpose_layer = - TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *scores_tensor); - scores_transpose_layer->setFirstTranspose(permutation); - } - - std::vector batch_nms_inputs; - batch_nms_inputs.push_back(bboxes_expand_layer->getOutput(0)); - batch_nms_inputs.push_back(scores_transpose_layer->getOutput(0)); - - constexpr bool shareLocation = true; - constexpr bool clip_boxes = false; - - const std::vector fields{ - {"shareLocation", &shareLocation, nvinfer1::PluginFieldType::kINT32, 1}, - {"backgroundLabelId", - &background_label, - nvinfer1::PluginFieldType::kINT32, - 1}, - {"numClasses", &num_classes, nvinfer1::PluginFieldType::kINT32, 1}, - {"topK", &nms_top_k, nvinfer1::PluginFieldType::kINT32, 1}, - {"keepTopK", &keep_top_k, nvinfer1::PluginFieldType::kINT32, 1}, - {"scoreThreshold", - &score_threshold, - nvinfer1::PluginFieldType::kFLOAT32, - 1}, - {"iouThreshold", - &nms_threshold, - nvinfer1::PluginFieldType::kFLOAT32, - 1}, - {"isNormalized", &normalized, nvinfer1::PluginFieldType::kINT32, 1}, - {"clipBoxes", &clip_boxes, nvinfer1::PluginFieldType::kINT32, 1}, - }; - - nvinfer1::PluginFieldCollection* plugin_collections = - static_cast( - malloc(sizeof(*plugin_collections) + - fields.size() * sizeof(nvinfer1::PluginField))); - plugin_collections->nbFields = static_cast(fields.size()); - plugin_collections->fields = fields.data(); - - std::string nms_plugin_name = "BatchedNMS_TRT"; - if (engine_->with_dynamic_shape()) { - nms_plugin_name = "BatchedNMSDynamic_TRT"; - } - auto creator = - GetPluginRegistry()->getPluginCreator(nms_plugin_name.c_str(), "1"); - auto batch_nms_plugin = - creator->createPlugin(nms_plugin_name.c_str(), plugin_collections); - free(plugin_collections); - - auto batch_nms_layer = engine_->network()->addPluginV2( - batch_nms_inputs.data(), batch_nms_inputs.size(), *batch_nms_plugin); - auto nmsed_boxes = batch_nms_layer->getOutput(1); - auto nmsed_scores = batch_nms_layer->getOutput(2); - auto nmsed_classes = batch_nms_layer->getOutput(3); - - auto nmsed_scores_transpose_layer = - TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *nmsed_scores); - auto nmsed_classes_reshape_layer = - TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *nmsed_classes); - if (engine_->with_dynamic_shape()) { - nmsed_scores_transpose_layer->setReshapeDimensions( - nvinfer1::Dims3(bboxes_dims.d[0], keep_top_k, 1)); - - nmsed_classes_reshape_layer->setReshapeDimensions( - nvinfer1::Dims3(bboxes_dims.d[0], keep_top_k, 1)); - } else { - nmsed_scores_transpose_layer->setReshapeDimensions( - nvinfer1::Dims2(keep_top_k, 1)); - - nmsed_classes_reshape_layer->setReshapeDimensions( - nvinfer1::Dims2(keep_top_k, 1)); - } - - std::vector concat_inputs; - concat_inputs.push_back(nmsed_classes_reshape_layer->getOutput(0)); - concat_inputs.push_back(nmsed_scores_transpose_layer->getOutput(0)); - concat_inputs.push_back(nmsed_boxes); - - auto nms_concat_layer = TRT_ENGINE_ADD_LAYER( - engine_, Concatenation, concat_inputs.data(), concat_inputs.size()); - int axis_index = engine_->with_dynamic_shape() ? 1 : 0; - nms_concat_layer->setAxis(axis_index + 1); - - ReplenishLayerAndOutput( - nms_concat_layer, "multiclass_nms", {output_name}, test_mode); - } -}; - -} // namespace tensorrt -} // namespace inference -} // namespace paddle - -REGISTER_TRT_OP_CONVERTER(multiclass_nms, MultiClassNMSOpConverter); diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index 9aa19af0ba809..90bc9ddc15284 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -38,7 +38,6 @@ detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc polygon_box_transform_op.cu) detection_library(generate_proposal_labels_op SRCS generate_proposal_labels_op.cc) -detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc DEPS phi common) detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu) detection_library(box_decoder_and_assign_op SRCS box_decoder_and_assign_op.cc box_decoder_and_assign_op.cu) diff --git a/paddle/fluid/operators/detection/multiclass_nms_op.cc b/paddle/fluid/operators/detection/multiclass_nms_op.cc deleted file mode 100644 index 73ec6caa61c27..0000000000000 --- a/paddle/fluid/operators/detection/multiclass_nms_op.cc +++ /dev/null @@ -1,630 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -limitations under the License. */ - -#include - -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/infermeta/ternary.h" -#include "paddle/phi/kernels/funcs/detection/nms_util.h" - -namespace paddle { -namespace operators { - -inline std::vector GetNmsLodFromRoisNum( - const phi::DenseTensor* rois_num) { - std::vector rois_lod; - auto* rois_num_data = rois_num->data(); - rois_lod.push_back(static_cast(0)); - for (int i = 0; i < rois_num->numel(); ++i) { - rois_lod.push_back(rois_lod.back() + static_cast(rois_num_data[i])); - } - return rois_lod; -} - -class MultiClassNMSOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("BBoxes"), "Input", "BBoxes", "MultiClassNMS"); - OP_INOUT_CHECK(ctx->HasInput("Scores"), "Input", "Scores", "MultiClassNMS"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "MultiClassNMS"); - auto box_dims = ctx->GetInputDim("BBoxes"); - auto score_dims = ctx->GetInputDim("Scores"); - int score_size = static_cast(score_dims.size()); - - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ(score_size == 2 || score_size == 3, - true, - phi::errors::InvalidArgument( - "The rank of Input(Scores) must be 2 or 3" - ". But received rank = %d", - score_size)); - PADDLE_ENFORCE_EQ( - box_dims.size(), - 3, - phi::errors::InvalidArgument("The rank of Input(BBoxes) must be 3" - ". But received rank = %d", - box_dims.size())); - if (score_size == 3) { - PADDLE_ENFORCE_EQ(box_dims[2] == 4 || box_dims[2] == 8 || - box_dims[2] == 16 || box_dims[2] == 24 || - box_dims[2] == 32, - true, - phi::errors::InvalidArgument( - "The last dimension of Input" - "(BBoxes) must be 4 or 8, " - "represents the layout of coordinate " - "[xmin, ymin, xmax, ymax] or " - "4 points: [x1, y1, x2, y2, x3, y3, x4, y4] or " - "8 points: [xi, yi] i= 1,2,...,8 or " - "12 points: [xi, yi] i= 1,2,...,12 or " - "16 points: [xi, yi] i= 1,2,...,16")); - PADDLE_ENFORCE_EQ( - box_dims[1], - score_dims[2], - phi::errors::InvalidArgument( - "The 2nd dimension of Input(BBoxes) must be equal to " - "last dimension of Input(Scores), which represents the " - "predicted bboxes." - "But received box_dims[1](%s) != socre_dims[2](%s)", - box_dims[1], - score_dims[2])); - } else { - PADDLE_ENFORCE_EQ(box_dims[2], - 4, - phi::errors::InvalidArgument( - "The last dimension of Input" - "(BBoxes) must be 4. But received dimension = %d", - box_dims[2])); - PADDLE_ENFORCE_EQ( - box_dims[1], - score_dims[1], - phi::errors::InvalidArgument( - "The 2nd dimension of Input" - "(BBoxes) must be equal to the 2nd dimension of Input(Scores). " - "But received box dimension = %d, score dimension = %d", - box_dims[1], - score_dims[1])); - } - } - // Here the box_dims[0] is not the real dimension of output. - // It will be rewritten in the computing kernel. - ctx->SetOutputDim("Out", {-1, box_dims[2] + 2}); - if (!ctx->IsRuntime()) { - ctx->SetLoDLevel("Out", std::max(ctx->GetLoDLevel("BBoxes"), 1)); - } - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey( - OperatorWithKernel::IndicateVarDataType(ctx, "Scores"), - platform::CPUPlace()); - } -}; - -template -void SliceOneClass(const platform::DeviceContext& ctx, - const phi::DenseTensor& items, - const int class_id, - phi::DenseTensor* one_class_item) { - T* item_data = one_class_item->mutable_data(ctx.GetPlace()); - const T* items_data = items.data(); - const int64_t num_item = items.dims()[0]; - const int class_num = static_cast(items.dims()[1]); - if (items.dims().size() == 3) { - int item_size = static_cast(items.dims()[2]); - for (int i = 0; i < num_item; ++i) { - std::memcpy(item_data + i * item_size, - items_data + i * class_num * item_size + class_id * item_size, - sizeof(T) * item_size); - } - } else { - for (int i = 0; i < num_item; ++i) { - item_data[i] = items_data[i * class_num + class_id]; - } - } -} - -template -class MultiClassNMSKernel : public framework::OpKernel { - public: - void NMSFast(const phi::DenseTensor& bbox, - const phi::DenseTensor& scores, - const T score_threshold, - const T nms_threshold, - const T eta, - const int64_t top_k, - std::vector* selected_indices, - const bool normalized) const { - // The total boxes for each instance. - int64_t num_boxes = bbox.dims()[0]; - // 4: [xmin ymin xmax ymax] - // 8: [x1 y1 x2 y2 x3 y3 x4 y4] - // 16, 24, or 32: [x1 y1 x2 y2 ... xn yn], n = 8, 12 or 16 - int64_t box_size = bbox.dims()[1]; - - std::vector scores_data(num_boxes); - std::copy_n(scores.data(), num_boxes, scores_data.begin()); - std::vector> sorted_indices; - phi::funcs::GetMaxScoreIndex( - scores_data, score_threshold, top_k, &sorted_indices); - - selected_indices->clear(); - T adaptive_threshold = nms_threshold; - const T* bbox_data = bbox.data(); - - while (!sorted_indices.empty()) { - const int idx = sorted_indices.front().second; - bool keep = true; - for (const auto kept_idx : *selected_indices) { - if (keep) { - T overlap = T(0.); - // 4: [xmin ymin xmax ymax] - if (box_size == 4) { - overlap = - phi::funcs::JaccardOverlap(bbox_data + idx * box_size, - bbox_data + kept_idx * box_size, - normalized); - } - // 8: [x1 y1 x2 y2 x3 y3 x4 y4] or 16, 24, 32 - if (box_size == 8 || box_size == 16 || box_size == 24 || - box_size == 32) { - overlap = phi::funcs::PolyIoU(bbox_data + idx * box_size, - bbox_data + kept_idx * box_size, - box_size, - normalized); - } - keep = overlap <= adaptive_threshold; - } else { - break; - } - } - if (keep) { - selected_indices->push_back(idx); - } - sorted_indices.erase(sorted_indices.begin()); - if (keep && eta < 1 && adaptive_threshold > 0.5) { - adaptive_threshold *= eta; - } - } - } - - void MultiClassNMS(const framework::ExecutionContext& ctx, - const phi::DenseTensor& scores, - const phi::DenseTensor& bboxes, - const int scores_size, - std::map>* indices, - int* num_nmsed_out) const { - int64_t background_label = ctx.Attr("background_label"); - int64_t nms_top_k = ctx.Attr("nms_top_k"); - int64_t keep_top_k = ctx.Attr("keep_top_k"); - bool normalized = ctx.Attr("normalized"); - T nms_threshold = static_cast(ctx.Attr("nms_threshold")); - T nms_eta = static_cast(ctx.Attr("nms_eta")); - T score_threshold = static_cast(ctx.Attr("score_threshold")); - auto& dev_ctx = ctx.template device_context(); - - int num_det = 0; - - int64_t class_num = scores_size == 3 ? scores.dims()[0] : scores.dims()[1]; - phi::DenseTensor bbox_slice, score_slice; - for (int64_t c = 0; c < class_num; ++c) { - if (c == background_label) continue; - if (scores_size == 3) { - score_slice = scores.Slice(c, c + 1); - bbox_slice = bboxes; - } else { - score_slice.Resize({scores.dims()[0], 1}); - bbox_slice.Resize({scores.dims()[0], 4}); - SliceOneClass(dev_ctx, scores, c, &score_slice); - SliceOneClass(dev_ctx, bboxes, c, &bbox_slice); - } - NMSFast(bbox_slice, - score_slice, - score_threshold, - nms_threshold, - nms_eta, - nms_top_k, - &((*indices)[c]), // NOLINT - normalized); - if (scores_size == 2) { - std::stable_sort((*indices)[c].begin(), (*indices)[c].end()); // NOLINT - } - num_det += (*indices)[c].size(); // NOLINT - } - - *num_nmsed_out = num_det; - const T* scores_data = scores.data(); - if (keep_top_k > -1 && num_det > keep_top_k) { - const T* sdata = nullptr; - std::vector>> score_index_pairs; - for (const auto& it : *indices) { - int label = it.first; - if (scores_size == 3) { - sdata = scores_data + label * scores.dims()[1]; - } else { - score_slice.Resize({scores.dims()[0], 1}); - SliceOneClass(dev_ctx, scores, label, &score_slice); - sdata = score_slice.data(); - } - const std::vector& label_indices = it.second; - for (auto idx : label_indices) { - score_index_pairs.push_back( - std::make_pair(sdata[idx], std::make_pair(label, idx))); - } - } - // Keep top k results per image. - std::stable_sort(score_index_pairs.begin(), - score_index_pairs.end(), - phi::funcs::SortScorePairDescend>); - score_index_pairs.resize(keep_top_k); - - // Store the new indices. - std::map> new_indices; - for (auto& score_index_pair : score_index_pairs) { - int label = score_index_pair.second.first; - int idx = score_index_pair.second.second; - new_indices[label].push_back(idx); - } - if (scores_size == 2) { - for (const auto& it : new_indices) { - int label = it.first; - std::stable_sort(new_indices[label].begin(), - new_indices[label].end()); - } - } - new_indices.swap(*indices); - *num_nmsed_out = keep_top_k; // NOLINT - } - } - - void MultiClassOutput(const platform::DeviceContext& ctx, - const phi::DenseTensor& scores, - const phi::DenseTensor& bboxes, - const std::map>& selected_indices, - const int scores_size, - phi::DenseTensor* outs, - int* oindices = nullptr, - const int offset = 0) const { - int64_t class_num = scores.dims()[1]; - int64_t predict_dim = scores.dims()[1]; - int64_t box_size = bboxes.dims()[1]; - if (scores_size == 2) { - box_size = bboxes.dims()[2]; - } - int64_t out_dim = box_size + 2; - auto* scores_data = scores.data(); - auto* bboxes_data = bboxes.data(); - auto* odata = outs->data(); - const T* sdata = nullptr; - phi::DenseTensor bbox; - bbox.Resize({scores.dims()[0], box_size}); - int count = 0; - for (const auto& it : selected_indices) { - int label = it.first; - const std::vector& indices = it.second; - if (scores_size == 2) { - SliceOneClass(ctx, bboxes, label, &bbox); - } else { - sdata = scores_data + label * predict_dim; - } - - for (auto idx : indices) { - odata[count * out_dim] = label; // label - const T* bdata = nullptr; - if (scores_size == 3) { - bdata = bboxes_data + idx * box_size; - odata[count * out_dim + 1] = sdata[idx]; // score - if (oindices != nullptr) { - oindices[count] = offset + idx; - } - } else { - bdata = bbox.data() + idx * box_size; - odata[count * out_dim + 1] = *(scores_data + idx * class_num + label); - if (oindices != nullptr) { - oindices[count] = - static_cast(offset + idx * class_num + label); - } - } - // xmin, ymin, xmax, ymax or multi-points coordinates - std::memcpy(odata + count * out_dim + 2, bdata, box_size * sizeof(T)); - count++; - } - } - } - - void Compute(const framework::ExecutionContext& ctx) const override { - auto* boxes = ctx.Input("BBoxes"); - auto* scores = ctx.Input("Scores"); - auto* outs = ctx.Output("Out"); - bool return_index = ctx.HasOutput("Index") ? true : false; - auto index = ctx.Output("Index"); - bool has_roisnum = ctx.HasInput("RoisNum") ? true : false; - auto rois_num = ctx.Input("RoisNum"); - auto score_dims = common::vectorize(scores->dims()); - auto score_size = score_dims.size(); - auto& dev_ctx = ctx.template device_context(); - - std::vector>> all_indices; - std::vector batch_starts = {0}; - int64_t batch_size = score_dims[0]; - int64_t box_dim = boxes->dims()[2]; - int64_t out_dim = box_dim + 2; - int num_nmsed_out = 0; - phi::DenseTensor boxes_slice, scores_slice; - int n = 0; - if (has_roisnum) { - n = static_cast(score_size == 3 ? batch_size : rois_num->numel()); - } else { - n = static_cast(score_size == 3 ? batch_size - : boxes->lod().back().size() - 1); - } - for (int i = 0; i < n; ++i) { - std::map> indices; - if (score_size == 3) { - scores_slice = scores->Slice(i, i + 1); - scores_slice.Resize({score_dims[1], score_dims[2]}); - boxes_slice = boxes->Slice(i, i + 1); - boxes_slice.Resize({score_dims[2], box_dim}); - } else { - std::vector boxes_lod; - if (has_roisnum) { - boxes_lod = GetNmsLodFromRoisNum(rois_num); - } else { - boxes_lod = boxes->lod().back(); - } - if (boxes_lod[i] == boxes_lod[i + 1]) { - all_indices.push_back(indices); - batch_starts.push_back(batch_starts.back()); - continue; - } - scores_slice = scores->Slice(static_cast(boxes_lod[i]), - static_cast(boxes_lod[i + 1])); - boxes_slice = boxes->Slice(static_cast(boxes_lod[i]), - static_cast(boxes_lod[i + 1])); - } - MultiClassNMS( - ctx, scores_slice, boxes_slice, score_size, &indices, &num_nmsed_out); - all_indices.push_back(indices); - batch_starts.push_back(batch_starts.back() + num_nmsed_out); - } - - int num_kept = static_cast(batch_starts.back()); - if (num_kept == 0) { - if (return_index) { - outs->mutable_data({0, out_dim}, ctx.GetPlace()); - index->mutable_data({0, 1}, ctx.GetPlace()); - } else { - T* od = outs->mutable_data({1, 1}, ctx.GetPlace()); - od[0] = -1; - batch_starts = {0, 1}; - } - } else { - outs->mutable_data({num_kept, out_dim}, ctx.GetPlace()); - int offset = 0; - int* oindices = nullptr; - for (int i = 0; i < n; ++i) { - if (score_size == 3) { - scores_slice = scores->Slice(i, i + 1); - boxes_slice = boxes->Slice(i, i + 1); - scores_slice.Resize({score_dims[1], score_dims[2]}); - boxes_slice.Resize({score_dims[2], box_dim}); - if (return_index) { - offset = i * score_dims[2]; - } - } else { - std::vector boxes_lod; - if (has_roisnum) { - boxes_lod = GetNmsLodFromRoisNum(rois_num); - } else { - boxes_lod = boxes->lod().back(); - } - if (boxes_lod[i] == boxes_lod[i + 1]) continue; - scores_slice = scores->Slice(static_cast(boxes_lod[i]), - static_cast(boxes_lod[i + 1])); - boxes_slice = boxes->Slice(static_cast(boxes_lod[i]), - static_cast(boxes_lod[i + 1])); - if (return_index) { - offset = static_cast(boxes_lod[i] * score_dims[1]); - } - } - - int64_t s = static_cast(batch_starts[i]); - int64_t e = static_cast(batch_starts[i + 1]); - if (e > s) { - phi::DenseTensor out = outs->Slice(s, e); - if (return_index) { - int* output_idx = - index->mutable_data({num_kept, 1}, ctx.GetPlace()); - oindices = output_idx + s; - } - MultiClassOutput(dev_ctx, - scores_slice, - boxes_slice, - all_indices[i], - score_dims.size(), - &out, - oindices, - offset); - } - } - } - if (ctx.HasOutput("NmsRoisNum")) { - auto* nms_rois_num = ctx.Output("NmsRoisNum"); - nms_rois_num->mutable_data({n}, ctx.GetPlace()); - int* num_data = nms_rois_num->data(); - for (int i = 1; i <= n; i++) { - num_data[i - 1] = - static_cast(batch_starts[i] - batch_starts[i - 1]); - } - nms_rois_num->Resize({n}); - } - - framework::LoD lod; - lod.emplace_back(batch_starts); - if (return_index) { - index->set_lod(lod); - } - outs->set_lod(lod); - } -}; - -class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("BBoxes", - "Two types of bboxes are supported:" - "1. (Tensor) A 3-D Tensor with shape " - "[N, M, 4 or 8 16 24 32] represents the " - "predicted locations of M bounding bboxes, N is the batch size. " - "Each bounding box has four coordinate values and the layout is " - "[xmin, ymin, xmax, ymax], when box size equals to 4." - "2. (phi::DenseTensor) A 3-D Tensor with shape [M, C, 4]" - "M is the number of bounding boxes, C is the class number"); - AddInput("Scores", - "Two types of scores are supported:" - "1. (Tensor) A 3-D Tensor with shape [N, C, M] represents the " - "predicted confidence predictions. N is the batch size, C is the " - "class number, M is number of bounding boxes. For each category " - "there are total M scores which corresponding M bounding boxes. " - " Please note, M is equal to the 2nd dimension of BBoxes. " - "2. (phi::DenseTensor) A 2-D phi::DenseTensor with shape [M, C]. " - "M is the number of bbox, C is the class number. In this case, " - "Input BBoxes should be the second case with shape [M, C, 4]."); - AddAttr( - "background_label", - "(int, default: 0) " - "The index of background label, the background label will be ignored. " - "If set to -1, then all categories will be considered.") - .SetDefault(0); - AddAttr("score_threshold", - "(float) " - "Threshold to filter out bounding boxes with low " - "confidence score. If not provided, consider all boxes."); - AddAttr("nms_top_k", - "(int64_t) " - "Maximum number of detections to be kept according to the " - "confidences after the filtering detections based on " - "score_threshold"); - AddAttr("nms_threshold", - "(float, default: 0.3) " - "The threshold to be used in NMS.") - .SetDefault(0.3); - AddAttr("nms_eta", - "(float) " - "The parameter for adaptive NMS.") - .SetDefault(1.0); - AddAttr("keep_top_k", - "(int64_t) " - "Number of total bboxes to be kept per image after NMS " - "step. -1 means keeping all bboxes after NMS step."); - AddAttr("normalized", - "(bool, default true) " - "Whether detections are normalized.") - .SetDefault(true); - AddOutput("Out", - "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] " - "represents the " - "detections. Each row has 6 values: " - "[label, confidence, xmin, ymin, xmax, ymax] or " - "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 10] " - "represents the " - "detections. Each row has 10 values: " - "[label, confidence, x1, y1, x2, y2, x3, y3, x4, y4]. No is the " - "total number of detections in this mini-batch." - "For each instance, " - "the offsets in first dimension are called LoD, the number of " - "offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is " - "no detected bbox."); - AddComment(R"DOC( -This operator is to do multi-class non maximum suppression (NMS) on a batched -of boxes and scores. -In the NMS step, this operator greedily selects a subset of detection bounding -boxes that have high scores larger than score_threshold, if providing this -threshold, then selects the largest nms_top_k confidences scores if nms_top_k -is larger than -1. Then this operator prunes away boxes that have high IOU -(intersection over union) overlap with already selected boxes by adaptive -threshold NMS based on parameters of nms_threshold and nms_eta. -After NMS step, at most keep_top_k number of total bboxes are to be kept -per image if keep_top_k is larger than -1. -This operator support multi-class and batched inputs. It applying NMS -independently for each class. The outputs is a 2-D LoDTensor, for each -image, the offsets in first dimension of phi::DenseTensor are called LoD, the number -of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0, -means there is no detected bbox for this image. -)DOC"); - } -}; - -class MultiClassNMS2Op : public MultiClassNMSOp { - public: - MultiClassNMS2Op(const std::string& type, - const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) - : MultiClassNMSOp(type, inputs, outputs, attrs) {} - - void InferShape(framework::InferShapeContext* ctx) const override { - MultiClassNMSOp::InferShape(ctx); - ctx->SetOutputDim("Index", {-1, 1}); - if (!ctx->IsRuntime()) { - ctx->SetLoDLevel("Index", std::max(ctx->GetLoDLevel("BBoxes"), 1)); - } - } -}; - -class MultiClassNMS2OpMaker : public MultiClassNMSOpMaker { - public: - void Make() override { - MultiClassNMSOpMaker::Make(); - AddOutput("Index", - "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 1] " - "represents the " - "index of selected bbox. The index is the absolute index cross " - "batches.") - .AsIntermediate(); - } -}; - -template -class MultiClassNMS2Kernel : public MultiClassNMSKernel {}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR( - multiclass_nms, - ops::MultiClassNMSOp, - ops::MultiClassNMSOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); -PD_REGISTER_STRUCT_KERNEL( - multiclass_nms, CPU, ALL_LAYOUT, ops::MultiClassNMSKernel, float, double) {} - -REGISTER_OPERATOR( - multiclass_nms2, - ops::MultiClassNMS2Op, - ops::MultiClassNMS2OpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); -PD_REGISTER_STRUCT_KERNEL(multiclass_nms2, - CPU, - ALL_LAYOUT, - ops::MultiClassNMS2Kernel, - float, - double) {} diff --git a/python/paddle/incubate/layers/__init__.py b/python/paddle/incubate/layers/__init__.py index 5430d1108cecb..0d1d840b4f6a4 100644 --- a/python/paddle/incubate/layers/__init__.py +++ b/python/paddle/incubate/layers/__init__.py @@ -22,7 +22,6 @@ fused_bn_add_act, fused_embedding_seq_pool, fused_seqpool_cvm, - multiclass_nms2, partial_concat, partial_sum, pow2_decay_with_linear_warmup, diff --git a/python/paddle/incubate/layers/nn.py b/python/paddle/incubate/layers/nn.py index aee7f2b9088de..01c7b38f84107 100644 --- a/python/paddle/incubate/layers/nn.py +++ b/python/paddle/incubate/layers/nn.py @@ -192,136 +192,6 @@ def fused_seqpool_cvm( return outs -def multiclass_nms2( - bboxes, - scores, - score_threshold, - nms_top_k, - keep_top_k, - nms_threshold=0.3, - normalized=True, - nms_eta=1.0, - background_label=0, - return_index=False, - name=None, -): - """ - **Multiclass NMS2** - - This operator is to do multi-class non maximum suppression (NMS) on - boxes and scores. - In the NMS step, this operator greedily selects a subset of detection bounding - boxes that have high scores larger than score_threshold, if providing this - threshold, then selects the largest nms_top_k confidences scores if nms_top_k - is larger than -1. Then this operator prunes away boxes that have high IOU - (intersection over union) overlap with already selected boxes by adaptive - threshold NMS based on parameters of nms_threshold and nms_eta. - After NMS step, at most keep_top_k number of total bboxes are to be kept - per image if keep_top_k is larger than -1. - - Args: - bboxes (Tensor): Two types of bboxes are supported: - 1. (Tensor) A 3-D Tensor with shape - [N, M, 4 or 8 16 24 32] represents the - predicted locations of M bounding bboxes, - N is the batch size. Each bounding box has four - coordinate values and the layout is - [xmin, ymin, xmax, ymax], when box size equals to 4. - 2. (LoDTensor) A 3-D Tensor with shape [M, C, 4] - M is the number of bounding boxes, C is the - class number. - scores (Tensor): Two types of scores are supported: - 1. (Tensor) A 3-D Tensor with shape [N, C, M] - represents the predicted confidence predictions. - N is the batch size, C is the class number, M is - number of bounding boxes. For each category there - are total M scores which corresponding M bounding - boxes. Please note, M is equal to the 2nd dimension - of BBoxes. - 2. (LoDTensor) A 2-D LoDTensor with shape [M, C]. - M is the number of bbox, C is the class number. - In this case, input BBoxes should be the second - case with shape [M, C, 4]. - score_threshold (float): Threshold to filter out bounding boxes with - low confidence score. If not provided, - consider all boxes. - nms_top_k (int): Maximum number of detections to be kept according to - the confidences after the filtering detections based - on score_threshold. - keep_top_k (int): Number of total bboxes to be kept per image after NMS - step. -1 means keeping all bboxes after NMS step. - nms_threshold (float, optional): The threshold to be used in NMS. Default: 0.3. - normalized (bool, optional): Whether detections are normalized. Default: True. - nms_eta (float, optional): The threshold to be used in NMS. Default: 1.0. - background_label (int, optional): The index of background label, the background - label will be ignored. If set to -1, then all - categories will be considered. Default: 0. - return_index(bool, optional): Whether return selected index. Default: False. - name(str, optional): Name of the multiclass nms op. Default: None. - - Returns: - A tuple with two dimensions of the tensor: (Out, Index) if return_index is True, - otherwise, a tuple with one dimension of the tensor(Out) is returned. - Out: A 2-D LoDTensor with shape [No, 6] represents the detections. - Each row has 6 values: [label, confidence, xmin, ymin, xmax, ymax] - or A 2-D LoDTensor with shape [No, 10] represents the detections. - Each row has 10 values: [label, confidence, x1, y1, x2, y2, x3, y3, - x4, y4]. No is the total number of detections. - If all images have not detected results, all elements in LoD will be - 0, and output tensor is empty (None). - Index: Only return when return_index is True. A 2-D LoDTensor with - shape [No, 1] represents the selected index which type is Integer. - The index is the absolute value cross batches. No is the same number - as Out. If the index is used to gather other attribute such as age, - one needs to reshape the input(N, M, 1) to (N * M, 1) as first, where - N is the batch size and M is the number of boxes. - - - Examples: - .. code-block:: python - - >>> import paddle - >>> paddle.enable_static() - >>> boxes = paddle.static.data(name='bboxes', shape=[-1, 81, 4], - ... dtype='float32', lod_level=1) - >>> scores = paddle.static.data(name='scores', shape=[-1, 81], - ... dtype='float32', lod_level=1) - >>> out, index = paddle.incubate.layers.multiclass_nms2(bboxes=boxes, - ... scores=scores, - ... background_label=0, - ... score_threshold=0.5, - ... nms_top_k=400, - ... nms_threshold=0.3, - ... keep_top_k=200, - ... normalized=False, - ... return_index=True) - """ - helper = LayerHelper('multiclass_nms2', **locals()) - - output = helper.create_variable_for_type_inference(dtype=bboxes.dtype) - index = helper.create_variable_for_type_inference(dtype='int') - helper.append_op( - type="multiclass_nms2", - inputs={'BBoxes': bboxes, 'Scores': scores}, - attrs={ - 'background_label': background_label, - 'score_threshold': score_threshold, - 'nms_top_k': nms_top_k, - 'nms_threshold': nms_threshold, - 'keep_top_k': keep_top_k, - 'nms_eta': nms_eta, - 'normalized': normalized, - }, - outputs={'Out': output, 'Index': index}, - ) - output.stop_gradient = True - index.stop_gradient = True - - if return_index: - return output, index - return output - - def search_pyramid_hash( input, num_emb, diff --git a/test/legacy_test/test_detection.py b/test/legacy_test/test_detection.py index e8819c9dc62bf..397b3adba5387 100644 --- a/test/legacy_test/test_detection.py +++ b/test/legacy_test/test_detection.py @@ -159,27 +159,6 @@ def test_generate_proposals(self): np.testing.assert_array_equal(np.array(rois_num_stat), rois_num_dy) -class TestMulticlassNMS2(unittest.TestCase): - def test_multiclass_nms2(self): - program = Program() - with program_guard(program): - bboxes = paddle.static.data( - name='bboxes', shape=[-1, 10, 4], dtype='float32' - ) - scores = paddle.static.data( - name='scores', shape=[-1, 10], dtype='float32' - ) - output = paddle.incubate.layers.multiclass_nms2( - bboxes, scores, 0.3, 400, 200, 0.7 - ) - output2, index = paddle.incubate.layers.multiclass_nms2( - bboxes, scores, 0.3, 400, 200, 0.7, return_index=True - ) - self.assertIsNotNone(output) - self.assertIsNotNone(output2) - self.assertIsNotNone(index) - - class TestDistributeFpnProposals(LayerTest): def static_distribute_fpn_proposals(self, rois_np, rois_num_np): with self.static_graph(): diff --git a/test/legacy_test/test_multiclass_nms_op.py b/test/legacy_test/test_multiclass_nms_op.py index 7262bf88f7a1a..bd2a29a359fa4 100644 --- a/test/legacy_test/test_multiclass_nms_op.py +++ b/test/legacy_test/test_multiclass_nms_op.py @@ -20,7 +20,6 @@ import paddle from paddle import _C_ops -from paddle.base import core from paddle.base.layer_helper import LayerHelper @@ -506,62 +505,6 @@ def test_check_output(self): self.check_output() -class TestMulticlassNMSNoBox(TestMulticlassNMSLoDInput): - def setUp(self): - self.set_argument() - M = 1200 - C = 21 - BOX_SIZE = 4 - box_lod = [[0, 1200, 0]] - background = 0 - nms_threshold = 0.3 - nms_top_k = 400 - keep_top_k = 200 - score_threshold = self.score_threshold - normalized = False - - scores = np.random.random((M, C)).astype('float32') - - scores = np.apply_along_axis(softmax, 1, scores) - - boxes = np.random.random((M, C, BOX_SIZE)).astype('float32') - boxes[:, :, 0] = boxes[:, :, 0] * 10 - boxes[:, :, 1] = boxes[:, :, 1] * 10 - boxes[:, :, 2] = boxes[:, :, 2] * 10 + 10 - boxes[:, :, 3] = boxes[:, :, 3] * 10 + 10 - - det_outs, lod = lod_multiclass_nms( - boxes, - scores, - background, - score_threshold, - nms_threshold, - nms_top_k, - keep_top_k, - box_lod, - normalized, - ) - det_outs = np.array(det_outs).astype('float32') - nmsed_outs = ( - det_outs[:, :-1].astype('float32') if len(det_outs) else det_outs - ) - self.op_type = 'multiclass_nms' - self.inputs = { - 'BBoxes': (boxes, box_lod), - 'Scores': (scores, box_lod), - } - self.outputs = {'Out': (nmsed_outs, [lod])} - self.attrs = { - 'background_label': 0, - 'nms_threshold': nms_threshold, - 'nms_top_k': nms_top_k, - 'keep_top_k': keep_top_k, - 'score_threshold': score_threshold, - 'nms_eta': 1.0, - 'normalized': normalized, - } - - class TestIOU(unittest.TestCase): def test_iou(self): box1 = np.array([4.0, 3.0, 7.0, 5.0]).astype('float32') @@ -574,153 +517,13 @@ def test_iou(self): class TestMulticlassNMS2Op(TestMulticlassNMSOp): def setUp(self): - self.set_argument() - N = 7 - M = 1200 - C = 21 - BOX_SIZE = 4 - background = 0 - nms_threshold = 0.3 - nms_top_k = 400 - keep_top_k = 200 - score_threshold = self.score_threshold - - scores = np.random.random((N * M, C)).astype('float32') - - scores = np.apply_along_axis(softmax, 1, scores) - scores = np.reshape(scores, (N, M, C)) - scores = np.transpose(scores, (0, 2, 1)) - - boxes = np.random.random((N, M, BOX_SIZE)).astype('float32') - boxes[:, :, 0:2] = boxes[:, :, 0:2] * 0.5 - boxes[:, :, 2:4] = boxes[:, :, 2:4] * 0.5 + 0.5 - - det_outs, lod = batched_multiclass_nms( - boxes, - scores, - background, - score_threshold, - nms_threshold, - nms_top_k, - keep_top_k, - ) - det_outs = np.array(det_outs) - - nmsed_outs = ( - det_outs[:, :-1].astype('float32') - if len(det_outs) - else np.array([], dtype=np.float32).reshape([0, BOX_SIZE + 2]) - ) - index_outs = ( - det_outs[:, -1:].astype('int') - if len(det_outs) - else np.array([], dtype='int').reshape([0, 1]) - ) - self.op_type = 'multiclass_nms2' - self.inputs = {'BBoxes': boxes, 'Scores': scores} - self.outputs = { - 'Out': (nmsed_outs, [lod]), - 'Index': (index_outs, [lod]), - } - self.attrs = { - 'background_label': 0, - 'nms_threshold': nms_threshold, - 'nms_top_k': nms_top_k, - 'keep_top_k': keep_top_k, - 'score_threshold': score_threshold, - 'nms_eta': 1.0, - 'normalized': True, - } - - def test_check_output(self): - self.check_output() - - -class TestMulticlassNMS2OpNoOutput(TestMulticlassNMS2Op): - def set_argument(self): - # Here set 2.0 to test the case there is no outputs. - # In practical use, 0.0 < score_threshold < 1.0 - self.score_threshold = 2.0 - - -class TestMulticlassNMS2LoDInput(TestMulticlassNMSLoDInput): - def setUp(self): - self.set_argument() - M = 1200 - C = 21 - BOX_SIZE = 4 - box_lod = [[1200]] - background = 0 - nms_threshold = 0.3 - nms_top_k = 400 - keep_top_k = 200 - score_threshold = self.score_threshold - normalized = False - - scores = np.random.random((M, C)).astype('float32') - - scores = np.apply_along_axis(softmax, 1, scores) - - boxes = np.random.random((M, C, BOX_SIZE)).astype('float32') - boxes[:, :, 0] = boxes[:, :, 0] * 10 - boxes[:, :, 1] = boxes[:, :, 1] * 10 - boxes[:, :, 2] = boxes[:, :, 2] * 10 + 10 - boxes[:, :, 3] = boxes[:, :, 3] * 10 + 10 - - det_outs, lod = lod_multiclass_nms( - boxes, - scores, - background, - score_threshold, - nms_threshold, - nms_top_k, - keep_top_k, - box_lod, - normalized, - ) - - det_outs = np.array(det_outs) - nmsed_outs = ( - det_outs[:, :-1].astype('float32') - if len(det_outs) - else np.array([], dtype=np.float32).reshape([0, BOX_SIZE + 2]) - ) - index_outs = ( - det_outs[:, -1:].astype('int') - if len(det_outs) - else np.array([], dtype='int').reshape([0, 1]) - ) - self.op_type = 'multiclass_nms2' - self.inputs = { - 'BBoxes': (boxes, box_lod), - 'Scores': (scores, box_lod), - } - self.outputs = { - 'Out': (nmsed_outs, [lod]), - 'Index': (index_outs, [lod]), - } - self.attrs = { - 'background_label': 0, - 'nms_threshold': nms_threshold, - 'nms_top_k': nms_top_k, - 'keep_top_k': keep_top_k, - 'score_threshold': score_threshold, - 'nms_eta': 1.0, - 'normalized': normalized, - } + pass def test_check_output(self): self.check_output() -class TestMulticlassNMS2LoDNoOutput(TestMulticlassNMS2LoDInput): - def set_argument(self): - # Here set 2.0 to test the case there is no outputs. - # In practical use, 0.0 < score_threshold < 1.0 - self.score_threshold = 2.0 - - class TestMulticlassNMS3Op(TestMulticlassNMS2Op): def setUp(self): self.python_api = multiclass_nms3 @@ -794,53 +597,3 @@ def set_argument(self): # Here set 2.0 to test the case there is no outputs. # In practical use, 0.0 < score_threshold < 1.0 self.score_threshold = 2.0 - - -@unittest.skipIf( - not core.is_compiled_with_cuda(), "core is not compiled with CUDA" -) -class TestMulticlassNMS3OpGPU(TestMulticlassNMS2Op): - def test_check_output(self): - place = paddle.CUDAPlace(0) - self.check_output_with_place(place) - - def set_argument(self): - self.score_threshold = 0.01 - self.gpu_logic = True - - -@unittest.skipIf( - not core.is_compiled_with_cuda(), "core is not compiled with CUDA" -) -class TestMulticlassNMS3OpGPULessOutput(TestMulticlassNMS3OpGPU): - def set_argument(self): - # Here set 0.08 to make output box size less than keep_top_k - self.score_threshold = 0.08 - self.gpu_logic = True - - -@unittest.skipIf( - not core.is_compiled_with_cuda(), "core is not compiled with CUDA" -) -class TestMulticlassNMS3OpGPUNoOutput(TestMulticlassNMS3OpGPU): - def set_argument(self): - # Here set 2.0 to test the case there is no outputs. - # In practical use, 0.0 < score_threshold < 1.0 - self.score_threshold = 2.0 - self.gpu_logic = True - - -@unittest.skipIf( - not core.is_compiled_with_cuda(), "core is not compiled with CUDA" -) -class TestMulticlassNMS3OpGPUFallback(TestMulticlassNMS3OpGPU): - def set_argument(self): - # Setting keep_top_k < 0 will fall back to CPU kernel - self.score_threshold = 0.01 - self.keep_top_k = -1 - self.gpu_logic = True - - -if __name__ == '__main__': - paddle.enable_static() - unittest.main() From 822f9e3a7006b788be0f27db211b50426667301a Mon Sep 17 00:00:00 2001 From: co63oc Date: Wed, 17 Apr 2024 13:36:31 +0800 Subject: [PATCH 2/2] Fix --- paddle/fluid/inference/api/analysis_predictor.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index d4a73175b3222..71b6603161232 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -3294,7 +3294,6 @@ USE_TRT_CONVERTER(arg_max); USE_TRT_CONVERTER(arg_min); USE_TRT_CONVERTER(roi_align); USE_TRT_CONVERTER(affine_channel); -USE_TRT_CONVERTER(multiclass_nms); USE_TRT_CONVERTER(multiclass_nms3); USE_TRT_CONVERTER(nearest_interp); USE_TRT_CONVERTER(nearest_interp_v2);