diff --git a/docs/zh_cn/get_started/build.md b/docs/zh_cn/get_started/build.md index 99d2214e90..8c2ad15d64 100644 --- a/docs/zh_cn/get_started/build.md +++ b/docs/zh_cn/get_started/build.md @@ -354,3 +354,51 @@ y = torch.tensor([1, 5, 3]).mlu() w = torch.ones(10).float().mlu() output = sigmoid_focal_loss(x, y, 2.0, 0.25, w, 'none') ``` + +### 在昇腾 NPU 机器编译 mmcv + +在编译 mmcv 前,需要安装 torch_npu,完整安装教程详见 [PyTorch 安装指南](https://gitee.com/ascend/pytorch/blob/master/docs/zh/PyTorch%E5%AE%89%E8%A3%85%E6%8C%87%E5%8D%97/PyTorch%E5%AE%89%E8%A3%85%E6%8C%87%E5%8D%97.md#pytorch%E5%AE%89%E8%A3%85%E6%8C%87%E5%8D%97) + +#### 选项 1: 使用 NPU 设备源码编译安装 mmcv (推荐方式) + +- 拉取 [MMCV 源码](https://github.com/open-mmlab/mmcv.git) + +```bash +git pull https://github.com/open-mmlab/mmcv.git +``` + +- 编译 + +```bash +MMCV_WITH_OPS=1 MAX_JOBS=8 FORCE_NPU=1 python setup.py build_ext +``` + +- 安装 + +```bash +MMCV_WITH_OPS=1 FORCE_NPU=1 python setup.py develop +``` + +#### 选项 2: 使用 pip 安装 Ascend 编译版本的 mmcv + +Ascend 编译版本的 mmcv 在 mmcv >= 1.7.0 时已经支持直接 pip 安装 + +```bash +pip install mmcv -f https://download.openmmlab.com/mmcv/dist/ascend/torch1.8.0/index.html +``` + +#### 验证 + +```python +import torch +import torch_npu +from mmcv.ops import softmax_focal_loss + +# Init tensor to the NPU +x = torch.randn(3, 10).npu() +y = torch.tensor([1, 5, 3]).npu() +w = torch.ones(10).float().npu() + +output = softmax_focal_loss(x, y, 2.0, 0.25, w, 'none') +print(output) +``` diff --git a/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp b/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp index f428311fee..75f39caaa8 100644 --- a/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp +++ b/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp @@ -11,19 +11,37 @@ void roi_pool_forward_npu(Tensor input, Tensor rois, Tensor output, int64_t pooled_channel = 1; at::Tensor roi_actual_num = at_npu::native::OpPreparation::ApplyTensor( {}, rois.options().dtype(at::kInt), rois); - OpCommand cmd; - cmd.Name("RoiPoolingWithArgMax") - .Input(input) - .Input(rois) - .Input(roi_actual_num) - .Output(output) - .Output(argmax) - .Attr("pooled_h", pooled_height_64) - .Attr("pooled_w", pooled_width_64) - .Attr("spatial_scale_h", spatial_scale) - .Attr("spatial_scale_w", spatial_scale) - .Attr("pool_channel", pooled_channel) - .Run(); + if (input.sizes()[1] % 16 == 0) { + OpCommand cmd; + cmd.Name("RoiPoolingWithArgMax") + .Input(input) + .Input(rois) + .Input(roi_actual_num) + .Output(output) + .Output(argmax) + .Attr("pooled_h", pooled_height_64) + .Attr("pooled_w", pooled_width_64) + .Attr("spatial_scale_h", spatial_scale) + .Attr("spatial_scale_w", spatial_scale) + .Attr("pool_channel", pooled_channel) + .Run(); + + } else { + OpCommand cmd; + cmd.Name("RoiPoolingWithArgMax") + .Input(input) + .Input(rois) + .Input(roi_actual_num) + .Output(output) + .Output(argmax) + .Attr("pooled_h", pooled_height_64) + .Attr("pooled_w", pooled_width_64) + .Attr("spatial_scale_h", spatial_scale) + .Attr("spatial_scale_w", spatial_scale) + .Attr("pool_channel", pooled_channel) + .Attr("_exclude_engines", (string) "AiCore") + .Run(); + } } void roi_pool_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax,