Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Support ball_query with cambricon MLU backend and mlu-ops library. #2520

Merged
merged 13 commits into from
Jan 18, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions mmcv/ops/csrc/pytorch/mlu/ball_query_mlu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*************************************************************************
* Copyright (C) 2022 Cambricon.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*************************************************************************/
#include "mlu_common_desc.h"
#include "mlu_op.h"
#include "pytorch_device_registry.hpp"
#include "pytorch_mlu_helper.hpp"

void ball_query_forward_mlu(int b, int n, int m, float min_radius,
float max_radius, int nsample, const Tensor new_xyz,
const Tensor xyz, Tensor idx) {
MluOpTensorDescriptor new_xyz_desc, xyz_desc, idx_desc;
new_xyz_desc.set(new_xyz);
xyz_desc.set(xyz);
idx_desc.set(idx);

auto new_xyz_contiguous = torch_mlu::cnnl::ops::cnnl_contiguous(
new_xyz, new_xyz.suggest_memory_format());
auto xyz_contiguous = torch_mlu::cnnl::ops::cnnl_contiguous(
xyz, new_xyz.suggest_memory_format());
auto idx_contiguous = torch_mlu::cnnl::ops::cnnl_contiguous(
idx, new_xyz.suggest_memory_format());

auto new_xyz_impl = torch_mlu::getMluTensorImpl(new_xyz_contiguous);
auto xyz_impl = torch_mlu::getMluTensorImpl(xyz_contiguous);
auto idx_impl = torch_mlu::getMluTensorImpl(idx_contiguous);
auto new_xyz_ptr = new_xyz_impl->cnnlMalloc();
auto xyz_ptr = xyz_impl->cnnlMalloc();
DanieeelLiu marked this conversation as resolved.
Show resolved Hide resolved
auto idx_ptr = idx_impl->cnnlMalloc();

auto handle = mluOpGetCurrentHandle();
mluOpBallQuery(handle, new_xyz_desc.desc(), new_xyz_ptr, xyz_desc.desc(),
xyz_ptr, min_radius, max_radius, nsample, idx_desc.desc(),
idx_ptr);
}

void ball_query_forward_impl(int b, int n, int m, float min_radius,
float max_radius, int nsample,
const Tensor new_xyz, const Tensor xyz,
Tensor idx);

REGISTER_DEVICE_IMPL(ball_query_forward_impl, MLU, ball_query_forward_mlu);
103 changes: 103 additions & 0 deletions mmcv/ops/csrc/pytorch/mlu/mlu_common_desc.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*************************************************************************
* Copyright (C) 2022 Cambricon.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*************************************************************************/
#include "mlu_common_desc.h"

#include "mlu_op.h"

// Descriptors
mluOpDataType_t getMluOpDataType(const caffe2::TypeMeta& data_type) {
const std::map<std::string, mluOpDataType_t> mapping_type = {
{std::string("c10::Half"), MLUOP_DTYPE_HALF},
{std::string("float"), MLUOP_DTYPE_FLOAT},
{std::string("double"), MLUOP_DTYPE_DOUBLE},
{std::string("int8"), MLUOP_DTYPE_INT8},
{std::string("signed char"), MLUOP_DTYPE_INT8},
{std::string("short int"), MLUOP_DTYPE_INT16},
{std::string("short"), MLUOP_DTYPE_INT16},
{std::string("int"), MLUOP_DTYPE_INT32},
{std::string("long int"), MLUOP_DTYPE_INT64},
{std::string("long"), MLUOP_DTYPE_INT64},
{std::string("unsigned char"), MLUOP_DTYPE_UINT8},
{std::string("bool"), MLUOP_DTYPE_BOOL},
{std::string("c10::complex<c10::Half>"), MLUOP_DTYPE_COMPLEX_HALF},
{std::string("c10::complex<float>"), MLUOP_DTYPE_COMPLEX_FLOAT}};

if (mapping_type.find(std::string(data_type.name())) != mapping_type.end()) {
return mapping_type.find(std::string(data_type.name()))->second;
}
return MLUOP_DTYPE_INVALID;
}

// laytout
mluOpTensorLayout_t getMluOpSuggestLayout(const at::Tensor& input) {
auto suggest_memory_format = input.suggest_memory_format();
mluOpTensorLayout_t layout = MLUOP_LAYOUT_ARRAY;
switch (input.dim()) {
case 4:
layout = (suggest_memory_format == at::MemoryFormat::ChannelsLast)
? MLUOP_LAYOUT_NHWC : MLUOP_LAYOUT_NCHW;
break;
case 5:
layout = (suggest_memory_format == at::MemoryFormat::ChannelsLast3d)
? MLUOP_LAYOUT_NDHWC : MLUOP_LAYOUT_NCDHW;
break;
default:
layout = MLUOP_LAYOUT_ARRAY;
}
return layout;
}

void MluOpTensorDescriptor::set(Tensor t) {
mluOpDataType_t data_type = getMluOpDataType(t.dtype());
mluOpTensorLayout_t layout = getMluOpSuggestLayout(t);
int t_dim = t.dim();
std::vector<int> dim_array;
if (t_dim == 0) {
dim_array.push_back(
1); // ScalarTensor(0-dim 1-item Tensor) view like size = 1 as default;
} else {
for (int i = 0; i < t_dim; i++) {
dim_array.push_back(static_cast<int>(t.sizes().vec()[i]));
}
}
set_desc(t, layout, data_type, dim_array);
}

void MluOpTensorDescriptor::set_desc(const at::Tensor& t,
mluOpTensorLayout_t layout,
mluOpDataType_t dtype,
std::vector<int>& dims) {
int dimNb = dims.size();
mluOpSetTensorDescriptor(desc_, layout, dtype, dimNb, dims.data());
}

// Handles
std::once_flag mmcv_mluop_init_flag;
std::mutex mmcv_mluop_mutex;
static std::vector<MluOpHandle> mmcv_mluop_handles;

mluOpHandle_t mluOpGetCurrentHandle(c10::DeviceIndex device_index) {
std::call_once(mmcv_mluop_init_flag,
[]() // Init mmcv_mluop_handles 1-device <-> 1-handle
{
c10::DeviceIndex num_devices = torch_mlu::device_count();
mmcv_mluop_handles.resize(num_devices);
});

if (device_index == -1) {
device_index = torch_mlu::current_device();
}
std::lock_guard<std::mutex> mmcv_mluop_guard(mmcv_mluop_mutex);
auto queue = torch_mlu::getCurrentQueue(device_index).queue();
mmcv_mluop_handles[device_index].setQueue(queue);
return mmcv_mluop_handles[device_index].handle;
}
50 changes: 50 additions & 0 deletions mmcv/ops/csrc/pytorch/mlu/mlu_common_desc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*************************************************************************
* Copyright (C) 2022 Cambricon.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*************************************************************************/
#pragma once
#include <ATen/ATen.h>
#include <c10/core/ScalarType.h>

#include "mlu_op.h"
#include "pytorch_device_registry.hpp"
#include "pytorch_mlu_helper.hpp"

mluOpDataType_t getMluOpDataType(const caffe2::TypeMeta& data_type);
mluOpTensorLayout_t getMluOpSuggestLayout(const at::Tensor& input);

class MluOpTensorDescriptor {
public:
MluOpTensorDescriptor() { mluOpCreateTensorDescriptor(&desc_); };
~MluOpTensorDescriptor() { mluOpDestroyTensorDescriptor(desc_); }

void set(at::Tensor);
mluOpTensorDescriptor_t desc() { return desc_; }

private:
mluOpTensorDescriptor_t desc_;
void set_desc(const at::Tensor&, mluOpTensorLayout_t, mluOpDataType_t,
std::vector<int>& dims);
};

mluOpHandle_t mluOpGetCurrentHandle(c10::DeviceIndex device_index = -1);

class MluOpHandle {
public:
MluOpHandle() : handle(nullptr) { mluOpCreate(&handle); }
~MluOpHandle() {
if (handle) {
mluOpDestroy(handle);
handle = nullptr;
}
}
void setQueue(cnrtQueue_t queue) { mluOpSetQueue(handle, queue); }
mluOpHandle_t handle;
};
44 changes: 42 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ def get_extensions():

include_dirs = []

extra_objects = []
is_rocm_pytorch = False
try:
from torch.utils.cpp_extension import ROCM_HOME
Expand Down Expand Up @@ -300,16 +301,54 @@ def get_extensions():
torch.is_mlu_available()) or \
os.getenv('FORCE_MLU', '0') == '1':
from torch_mlu.utils.cpp_extension import MLUExtension
if not os.path.exists('./mlu-ops'):
import requests
mluops_url = 'https://github.com/Cambricon/'\
'mlu-ops/releases/latest'
response = requests.get(url=mluops_url, allow_redirects=False)
download_url = response.headers.get('Location').replace(
'releases/tag', 'archive/refs/tags') + '.zip'
req = requests.get(download_url)
with open('./mlu-ops-latest.zip', 'wb') as f:
try:
f.write(req.content)
except Exception:
raise ImportError('downlowad mlu-ops fail')
grimoire marked this conversation as resolved.
Show resolved Hide resolved

from zipfile import BadZipFile, ZipFile
with ZipFile('./mlu-ops-latest.zip', 'r') as archive:
try:
archive.extractall()
dir_name = archive.namelist()[0].split('/')[0]
os.rename(dir_name, 'mlu-ops')
except BadZipFile:
print('invalid mlu-ops-latest.zip file')

define_macros += [('MMCV_WITH_MLU', None)]
mlu_args = os.getenv('MMCV_MLU_ARGS')
extra_compile_args['cncc'] = [mlu_args] if mlu_args else []
mluops_includes = []
mluops_includes.append('-I' +
os.path.abspath('./mlu-ops/bangc-ops'))
mluops_includes.append(
'-I' + os.path.abspath('./mlu-ops/bangc-ops/kernels'))
extra_compile_args['cncc'] = [mlu_args] + \
mluops_includes if mlu_args else mluops_includes
DanieeelLiu marked this conversation as resolved.
Show resolved Hide resolved
op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') + \
glob.glob('./mmcv/ops/csrc/pytorch/mlu/*.cpp') + \
glob.glob('./mmcv/ops/csrc/common/mlu/*.mlu')
glob.glob('./mmcv/ops/csrc/common/mlu/*.mlu') + \
glob.glob(
'./mlu-ops/bangc-ops/core/**/*.cpp', recursive=True) + \
glob.glob(
'./mlu-ops/bangc-ops/kernels/**/*.cpp', recursive=True) + \
glob.glob(
'./mlu-ops/bangc-ops/kernels/**/*.mlu', recursive=True)
extra_objects = glob.glob(
'./mlu-ops/bangc-ops/kernels/*/x86_64/*.o')
extension = MLUExtension
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/mlu'))
include_dirs.append(os.path.abspath('./mlu-ops/bangc-ops'))
elif (hasattr(torch.backends, 'mps')
and torch.backends.mps.is_available()) or os.getenv(
'FORCE_MPS', '0') == '1':
Expand Down Expand Up @@ -371,6 +410,7 @@ def get_extensions():
sources=op_files,
include_dirs=include_dirs,
define_macros=define_macros,
extra_objects=extra_objects,
extra_compile_args=extra_compile_args)
extensions.append(ext_ops)

Expand Down
85 changes: 45 additions & 40 deletions tests/test_ops/test_ball_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,55 +3,60 @@
import torch

from mmcv.ops import ball_query
from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE


@pytest.mark.skipif(
not torch.cuda.is_available(), reason='requires CUDA support')
def test_ball_query():
new_xyz = torch.tensor([[[-0.0740, 1.3147, -1.3625],
[-2.2769, 2.7817, -0.2334],
[-0.4003, 2.4666, -0.5116],
[-0.0740, 1.3147, -1.3625],
[-0.0740, 1.3147, -1.3625]],
[[-2.0289, 2.4952, -0.1708],
[-2.0668, 6.0278, -0.4875],
[0.4066, 1.4211, -0.2947],
[-2.0289, 2.4952, -0.1708],
[-2.0289, 2.4952, -0.1708]]]).cuda()
@pytest.mark.parametrize('device', [
pytest.param(
'cuda',
marks=pytest.mark.skipif(
not IS_CUDA_AVAILABLE, reason='requires CUDA support')),
pytest.param(
'mlu',
marks=pytest.mark.skipif(
not IS_MLU_AVAILABLE, reason='requires MLU support'))
])
def test_ball_query(device):
print(device)
grimoire marked this conversation as resolved.
Show resolved Hide resolved
new_xyz = torch.tensor(
[[[-0.0740, 1.3147, -1.3625], [-2.2769, 2.7817, -0.2334],
[-0.4003, 2.4666, -0.5116], [-0.0740, 1.3147, -1.3625],
[-0.0740, 1.3147, -1.3625]],
[[-2.0289, 2.4952, -0.1708], [-2.0668, 6.0278, -0.4875],
[0.4066, 1.4211, -0.2947], [-2.0289, 2.4952, -0.1708],
[-2.0289, 2.4952, -0.1708]]],
device=device)

xyz = torch.tensor([[[-0.0740, 1.3147, -1.3625], [0.5555, 1.0399, -1.3634],
[-0.4003, 2.4666,
-0.5116], [-0.5251, 2.4379, -0.8466],
[-0.9691, 1.1418,
-1.3733], [-0.2232, 0.9561, -1.3626],
[-2.2769, 2.7817, -0.2334],
[-0.2822, 1.3192, -1.3645], [0.1533, 1.5024, -1.0432],
[0.4917, 1.1529, -1.3496]],
[[-2.0289, 2.4952,
-0.1708], [-0.7188, 0.9956, -0.5096],
[-2.0668, 6.0278, -0.4875], [-1.9304, 3.3092, 0.6610],
[0.0949, 1.4332, 0.3140], [-1.2879, 2.0008, -0.7791],
[-0.7252, 0.9611, -0.6371], [0.4066, 1.4211, -0.2947],
[0.3220, 1.4447, 0.3548], [-0.9744, 2.3856,
-1.2000]]]).cuda()
xyz = torch.tensor(
[[[-0.0740, 1.3147, -1.3625], [0.5555, 1.0399, -1.3634],
[-0.4003, 2.4666, -0.5116], [-0.5251, 2.4379, -0.8466],
[-0.9691, 1.1418, -1.3733], [-0.2232, 0.9561, -1.3626],
[-2.2769, 2.7817, -0.2334], [-0.2822, 1.3192, -1.3645],
[0.1533, 1.5024, -1.0432], [0.4917, 1.1529, -1.3496]],
[[-2.0289, 2.4952, -0.1708], [-0.7188, 0.9956, -0.5096],
[-2.0668, 6.0278, -0.4875], [-1.9304, 3.3092, 0.6610],
[0.0949, 1.4332, 0.3140], [-1.2879, 2.0008, -0.7791],
[-0.7252, 0.9611, -0.6371], [0.4066, 1.4211, -0.2947],
[0.3220, 1.4447, 0.3548], [-0.9744, 2.3856, -1.2000]]],
device=device)

idx = ball_query(0, 0.2, 5, xyz, new_xyz)
expected_idx = torch.tensor([[[0, 0, 0, 0, 0], [6, 6, 6, 6, 6],
[2, 2, 2, 2, 2], [0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]],
[[0, 0, 0, 0, 0], [2, 2, 2, 2, 2],
[7, 7, 7, 7, 7], [0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]]]).cuda()
expected_idx = torch.tensor(
[[[0, 0, 0, 0, 0], [6, 6, 6, 6, 6], [2, 2, 2, 2, 2], [0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]],
[[0, 0, 0, 0, 0], [2, 2, 2, 2, 2], [7, 7, 7, 7, 7], [0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]]],
device=device)
assert torch.all(idx == expected_idx)

# test dilated ball query
idx = ball_query(0.2, 0.4, 5, xyz, new_xyz)
expected_idx = torch.tensor([[[0, 5, 7, 0, 0], [6, 6, 6, 6, 6],
[2, 3, 2, 2, 2], [0, 5, 7, 0, 0],
[0, 5, 7, 0, 0]],
[[0, 0, 0, 0, 0], [2, 2, 2, 2, 2],
[7, 7, 7, 7, 7], [0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]]]).cuda()
expected_idx = torch.tensor(
[[[0, 5, 7, 0, 0], [6, 6, 6, 6, 6], [2, 3, 2, 2, 2], [0, 5, 7, 0, 0],
[0, 5, 7, 0, 0]],
[[0, 0, 0, 0, 0], [2, 2, 2, 2, 2], [7, 7, 7, 7, 7], [0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]]],
device=device)
assert torch.all(idx == expected_idx)


Expand Down