Skip to content

Commit a081009

Browse files
ckirchhoff2021root
authored andcommitted
[Feature] Add support for Ascend devices with nms_rotated (open-mmlab#2550)
* [Feature]: add nms_rotated npu adaptater code * [BugFix]: modify param in nms_rotated_npu.cpp * [clean code]: nms_rotated_npu.cpp * [clean code]: nms_rotated_npu.cpp * [clean code]: nms_rotated_npu.cpp * [clean code]: nms_rotated.cpp * [Doc]: add nms_rotated op in supported op list at ops.md * [Test]: add nms_rotated unit_test * [Bug]: remove device parameter in test_batched_nms function
1 parent 7e8bec1 commit a081009

File tree

7 files changed

+93
-16
lines changed

7 files changed

+93
-16
lines changed

docs/en/understand_mmcv/ops.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ We implement common ops used in detection, segmentation, etc.
3535
| ModulatedDeformConv2d |||| ||
3636
| MultiScaleDeformableAttn | ||| | |
3737
| NMS |||| ||
38-
| NMSRotated ||| | | |
38+
| NMSRotated ||| | | |
3939
| NMSQuadri ||| | | |
4040
| PixelGroup || | | | |
4141
| PointsInBoxes ||| | | |

docs/zh_cn/understand_mmcv/ops.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ MMCV 提供了检测、分割等任务中常用的算子
3535
| ModulatedDeformConv2d |||| ||
3636
| MultiScaleDeformableAttn | ||| | |
3737
| NMS |||| ||
38-
| NMSRotated ||| | | |
38+
| NMSRotated ||| | | |
3939
| NMSQuadri ||| | | |
4040
| PixelGroup || | | | |
4141
| PointsInBoxes ||| | | |

mmcv/ops/csrc/pytorch/nms_rotated.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,30 @@ Tensor nms_rotated_cuda(const Tensor dets, const Tensor scores,
1212
const float iou_threshold, const int multi_label);
1313
#endif
1414

15+
#ifdef MMCV_WITH_NPU
16+
Tensor nms_rotated_npu(const Tensor dets, const Tensor scores,
17+
const Tensor labels, const float iou_threshold);
18+
#endif
19+
1520
// Interface for Python
1621
// inline is needed to prevent multiple function definitions when this header is
1722
// included by different cpps
1823
Tensor nms_rotated(const Tensor dets, const Tensor scores, const Tensor order,
19-
const Tensor dets_sorted, const float iou_threshold,
20-
const int multi_label) {
24+
const Tensor dets_sorted, const Tensor labels,
25+
const float iou_threshold, const int multi_label) {
2126
assert(dets.device().is_cuda() == scores.device().is_cuda());
2227
if (dets.device().is_cuda()) {
2328
#ifdef MMCV_WITH_CUDA
2429
return nms_rotated_cuda(dets, scores, order, dets_sorted, iou_threshold,
2530
multi_label);
2631
#else
2732
AT_ERROR("Not compiled with GPU support");
33+
#endif
34+
} else if (dets.device().type() == at::kXLA) {
35+
#ifdef MMCV_WITH_NPU
36+
return nms_rotated_npu(dets, scores, labels, iou_threshold);
37+
#else
38+
AT_ERROR("Not compiled with NPU support");
2839
#endif
2940
}
3041

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#include "pytorch_npu_helper.hpp"
2+
3+
using namespace NPU_NAME_SPACE;
4+
5+
Tensor nms_rotated_npu(const Tensor dets, const Tensor scores,
6+
const Tensor labels, const float iou_threshold) {
7+
auto originDtype = dets.scalar_type();
8+
at::Tensor detsCast = dets;
9+
at::Tensor scoresCast = scores;
10+
if (originDtype != at::ScalarType::Float) {
11+
detsCast = NPUNativeFunctions::npu_dtype_cast(dets, at::kFloat);
12+
scoresCast = NPUNativeFunctions::npu_dtype_cast(scores, at::kFloat);
13+
}
14+
c10::SmallVector<int64_t, SIZE> selectedIndexSize = {dets.size(0)};
15+
at::Tensor selectedBox = OpPreparation::ApplyTensor(dets);
16+
at::Tensor selectedIndex = OpPreparation::ApplyTensor(
17+
selectedIndexSize, dets.options().dtype(at::kInt), dets);
18+
19+
c10::SmallVector<int64_t, N> output_sync_idx = {0, 1};
20+
OpCommand cmd;
21+
cmd.Sync(output_sync_idx)
22+
.Name("RotatedNMS")
23+
.Input(detsCast)
24+
.Input(scoresCast)
25+
.Input(labels)
26+
.Output(selectedBox)
27+
.Output(selectedIndex)
28+
.Attr("iou_threshold", (float)iou_threshold)
29+
.Run();
30+
selectedIndex = NPUNativeFunctions::npu_dtype_cast(selectedIndex, at::kLong);
31+
return selectedIndex;
32+
}

mmcv/ops/csrc/pytorch/pybind.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -309,8 +309,8 @@ void box_iou_rotated(const Tensor boxes1, const Tensor boxes2, Tensor ious,
309309
const int mode_flag, const bool aligned);
310310

311311
Tensor nms_rotated(const Tensor dets, const Tensor scores, const Tensor order,
312-
const Tensor dets_sorted, const float iou_threshold,
313-
const int multi_label);
312+
const Tensor dets_sorted, const Tensor labels,
313+
const float iou_threshold, const int multi_label);
314314

315315
Tensor upfirdn2d(const Tensor &input, const Tensor &kernel, int up_x, int up_y,
316316
int down_x, int down_y, int pad_x0, int pad_x1, int pad_y0,
@@ -748,7 +748,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
748748
py::arg("mode_flag"), py::arg("aligned"));
749749
m.def("nms_rotated", &nms_rotated, "NMS for rotated boxes", py::arg("dets"),
750750
py::arg("scores"), py::arg("order"), py::arg("dets_sorted"),
751-
py::arg("iou_threshold"), py::arg("multi_label"));
751+
py::arg("labels"), py::arg("iou_threshold"), py::arg("multi_label"));
752752
m.def("ball_query_forward", &ball_query_forward, "ball_query_forward",
753753
py::arg("new_xyz_tensor"), py::arg("xyz_tensor"), py::arg("idx_tensor"),
754754
py::arg("b"), py::arg("n"), py::arg("m"), py::arg("min_radius"),

mmcv/ops/nms.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,19 @@ def nms_rotated(dets: Tensor,
454454
else:
455455
dets_cw = dets
456456
multi_label = labels is not None
457+
if labels is None:
458+
input_labels = scores.new_empty(0, dtype=torch.int)
459+
else:
460+
input_labels = labels
461+
if dets.device.type == 'npu':
462+
order = scores.new_empty(0, dtype=torch.long)
463+
keep_inds = ext_module.nms_rotated(dets_cw, scores, order, dets_cw,
464+
input_labels, iou_threshold,
465+
multi_label)
466+
dets = torch.cat((dets[keep_inds], scores[keep_inds].reshape(-1, 1)),
467+
dim=1)
468+
return dets, keep_inds
469+
457470
if multi_label:
458471
dets_wl = torch.cat((dets_cw, labels.unsqueeze(1)), 1) # type: ignore
459472
else:
@@ -467,11 +480,13 @@ def nms_rotated(dets: Tensor,
467480
scores,
468481
order,
469482
dets_sorted,
483+
input_labels,
470484
iou_threshold=iou_threshold,
471485
multi_label=multi_label)
472486
else:
473487
keep_inds = ext_module.nms_rotated(dets_wl, scores, order, dets_sorted,
474-
iou_threshold, multi_label)
488+
input_labels, iou_threshold,
489+
multi_label)
475490
dets = torch.cat((dets[keep_inds], scores[keep_inds].reshape(-1, 1)),
476491
dim=1)
477492
return dets, keep_inds

tests/test_ops/test_nms_rotated.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,22 @@
33
import pytest
44
import torch
55

6+
from mmcv.utils import IS_CUDA_AVAILABLE, IS_NPU_AVAILABLE
7+
68

7-
@pytest.mark.skipif(
8-
not torch.cuda.is_available(),
9-
reason='GPU is required to test NMSRotated op')
109
class TestNmsRotated:
1110

12-
def test_ml_nms_rotated(self):
11+
@pytest.mark.parametrize('device', [
12+
pytest.param(
13+
'npu',
14+
marks=pytest.mark.skipif(
15+
not IS_NPU_AVAILABLE, reason='requires NPU support')),
16+
pytest.param(
17+
'cuda',
18+
marks=pytest.mark.skipif(
19+
not IS_CUDA_AVAILABLE, reason='requires CUDA support'))
20+
])
21+
def test_ml_nms_rotated(self, device):
1322
from mmcv.ops import nms_rotated
1423
np_boxes = np.array(
1524
[[6.0, 3.0, 8.0, 7.0, 0.5, 0.7], [3.0, 6.0, 9.0, 11.0, 0.6, 0.8],
@@ -24,8 +33,8 @@ def test_ml_nms_rotated(self):
2433
dtype=np.float32)
2534
np_expect_keep_inds = np.array([3, 1, 0], dtype=np.int64)
2635

27-
boxes = torch.from_numpy(np_boxes).cuda()
28-
labels = torch.from_numpy(np_labels).cuda()
36+
boxes = torch.from_numpy(np_boxes).to(device)
37+
labels = torch.from_numpy(np_labels).to(device)
2938

3039
# test cw angle definition
3140
dets, keep_inds = nms_rotated(boxes[:, :5], boxes[:, -1], 0.5, labels)
@@ -41,7 +50,17 @@ def test_ml_nms_rotated(self):
4150
assert np.allclose(dets.cpu().numpy()[:, :5], np_expect_dets)
4251
assert np.allclose(keep_inds.cpu().numpy(), np_expect_keep_inds)
4352

44-
def test_nms_rotated(self):
53+
@pytest.mark.parametrize('device', [
54+
pytest.param(
55+
'npu',
56+
marks=pytest.mark.skipif(
57+
not IS_NPU_AVAILABLE, reason='requires NPU support')),
58+
pytest.param(
59+
'cuda',
60+
marks=pytest.mark.skipif(
61+
not IS_CUDA_AVAILABLE, reason='requires CUDA support'))
62+
])
63+
def test_nms_rotated(self, device):
4564
from mmcv.ops import nms_rotated
4665
np_boxes = np.array(
4766
[[6.0, 3.0, 8.0, 7.0, 0.5, 0.7], [3.0, 6.0, 9.0, 11.0, 0.6, 0.8],
@@ -55,7 +74,7 @@ def test_nms_rotated(self):
5574
dtype=np.float32)
5675
np_expect_keep_inds = np.array([3, 1, 0], dtype=np.int64)
5776

58-
boxes = torch.from_numpy(np_boxes).cuda()
77+
boxes = torch.from_numpy(np_boxes).to(device)
5978

6079
# test cw angle definition
6180
dets, keep_inds = nms_rotated(boxes[:, :5], boxes[:, -1], 0.5)

0 commit comments

Comments
 (0)