diff --git a/backend_ops/ncnn/onnx2ncnn/onnx2ncnn.cpp b/backend_ops/ncnn/onnx2ncnn/onnx2ncnn.cpp index 7aee2551d..e6ef8b500 100644 --- a/backend_ops/ncnn/onnx2ncnn/onnx2ncnn.cpp +++ b/backend_ops/ncnn/onnx2ncnn/onnx2ncnn.cpp @@ -3736,6 +3736,7 @@ int main(int argc, char** argv) { float value = 0.f; value = get_node_attr_f(node, "value", 0.f); fprintf(pp, " 0=%f", value); + } else if (op == "Conv") { const onnx::TensorProto& W = weights[node.input(1)]; @@ -3989,6 +3990,9 @@ int main(int argc, char** argv) { int op_type = 2; fprintf(pp, " 0=%d", op_type); } else if (op == "Gather") { + if (weights[node.input(1)].dims_size() > 1) { + fprintf(stderr, "Unsupported indice dims > 1"); + } int axis = get_node_attr_i(node, "axis", 1) - 1; if (axis < 0) { fprintf(stderr, "Unsupported Gather axis: %d\n", axis + 1); diff --git a/backend_ops/ncnn/ops/gather/gather.cpp b/backend_ops/ncnn/ops/gather/gather.cpp old mode 100755 new mode 100644 index abf4f34c9..3eca16b39 --- a/backend_ops/ncnn/ops/gather/gather.cpp +++ b/backend_ops/ncnn/ops/gather/gather.cpp @@ -1,6 +1,7 @@ #include "gather.h" #include "../ncnn_ops_definer.h" +#include "assert.h" namespace mmlab { using namespace ncnn; @@ -17,6 +18,11 @@ int Gather::load_param(const ParamDict &pd) { return 0; } +// Gather only support 1-dim of indices, because the data and indices all has +// implicit batch in ncnn, this will lead to wrong shape to match onnx result. +// When indices dim equals to 1, after eliminating implicit batch, the indices +// dim still be 1. So there is only 1 implicit batch in data, this will make +// the shape match onnx result. int Gather::forward(const std::vector &bottom_blobs, std::vector &top_blobs, const Option &opt) const { const Mat &bottom_blob = bottom_blobs[0]; @@ -26,7 +32,7 @@ int Gather::forward(const std::vector &bottom_blobs, size_t elemsize = bottom_blob.elemsize; int positive_axis = axis < 0 ? dims + axis : axis; Mat &top_blob = top_blobs[0]; - + assert(indices.dims == 1); const float *indices_ptr = indices; if (dims == 1 && indices_dims == 1) // positive_axis == 0 @@ -46,49 +52,6 @@ int Gather::forward(const std::vector &bottom_blobs, return 0; } - if (dims == 1 && indices_dims == 2) // positive_axis == 0 - { - int w = indices.w; - int h = indices.h; - top_blob.create(w, h, elemsize, opt.blob_allocator); - if (top_blob.empty()) { - return -100; - } - const float *ptr = bottom_blob; - float *outptr = top_blob; - for (int j = 0; j < h; j++) { - for (int i = 0; i < w; i++) { - int indice = (int)(indices_ptr[j * w + i] + 0.5); - outptr[j * w + i] = ptr[indice]; - } - } - return 0; - } - if (dims == 1 && indices_dims == 3) // positive_axis == 0 - { - int c = indices.c; - int w = indices.w; - int h = indices.h; - top_blob.create(c, w, h, elemsize, opt.blob_allocator); - if (top_blob.empty()) { - return -100; - } - const float *ptr = bottom_blob; - - for (int page = 0; page < c; page++) { - indices_ptr = indices.channel(page); - float *outptr = top_blob.channel(page); - for (int j = 0; j < h; j++) { - for (int i = 0; i < w; i++) { - int indice = (int)(indices_ptr[j * w + i] + 0.5); - outptr[j * w + i] = ptr[indice]; - } - } - } - - return 0; - } - if (dims == 2 && positive_axis == 0 && indices_dims == 1) { int w = bottom_blob.w; int h = bottom_blob.h; @@ -130,51 +93,6 @@ int Gather::forward(const std::vector &bottom_blobs, return 0; } - if (dims == 2 && positive_axis == 0 && indices_dims == 2) { - int w = bottom_blob.w; - int h = bottom_blob.h; - top_blob.create(w, indices.w, indices.h, elemsize, opt.blob_allocator); - - if (top_blob.empty()) { - return -100; - } - const float *ptr = bottom_blob; - - for (int k = 0; k < indices.h; k++) { - float *outptr = top_blob.channel(k); - for (int i = 0; i < indices.w; i++) { - for (int j = 0; j < w; j++) { - int selected = (float)(indices_ptr[k * indices.w + i] + 0.5); - outptr[i * w + j] = ptr[selected * w + j]; - } - } - } - - return 0; - } - - if (dims == 2 && positive_axis == 1 && indices_dims == 2) { - int w = bottom_blob.w; - int h = bottom_blob.h; - top_blob.create(h, indices.w, indices.h, elemsize, opt.blob_allocator); - - if (top_blob.empty()) { - return -100; - } - const float *ptr = bottom_blob; - for (int k = 0; k < indices.h; k++) { - float *outptr = top_blob.channel(k); - for (int i = 0; i < indices.w; i++) { - for (int j = 0; j < h; j++) { - int selected = (int)(indices_ptr[k * indices.w + i] + 0.5); - outptr[i * h + j] = ptr[j * w + selected]; - } - } - } - - return 0; - } - if (dims == 3 && positive_axis == 0 && indices_dims == 1) { int w = bottom_blob.w; int h = bottom_blob.h; @@ -198,14 +116,14 @@ int Gather::forward(const std::vector &bottom_blobs, int w = bottom_blob.w; int h = bottom_blob.h; int channels = bottom_blob.c; - top_blob.create(w, channels, indices.w, elemsize, opt.blob_allocator); + top_blob.create(w, indices.w, channels, elemsize, opt.blob_allocator); #pragma omp parallel for num_threads(opt.num_threads) // use parallel programming - for (int i = 0; i < indices.w; i++) { - int selected = (int)(indices_ptr[i] + 0.5); + for (int i = 0; i < channels; i++) { float *outptr = top_blob.channel(i); - for (int j = 0; j < channels; j++) { - const float *ptr = bottom_blob.channel(j); + const float *ptr = bottom_blob.channel(i); + for (int j = 0; j < indices.w; j++) { + int selected = (int)(indices_ptr[j] + 0.5); for (int k = 0; k < w; k++) { outptr[j * w + k] = ptr[selected * w + k]; } @@ -216,25 +134,22 @@ int Gather::forward(const std::vector &bottom_blobs, } if (dims == 3 && positive_axis == 2 && indices_dims == 1) { - fprintf(stderr, "gather: dim = 3\n"); int w = bottom_blob.w; int h = bottom_blob.h; int channels = bottom_blob.c; - top_blob.create(h, channels, indices.w, elemsize, opt.blob_allocator); + top_blob.create(indices.w, h, channels, elemsize, opt.blob_allocator); #pragma omp parallel for num_threads(opt.num_threads) // use parallel programming - for (int i = 0; i < indices.w; i++) { - int selected = (int)(indices_ptr[i] + 0.5); + for (int i = 0; i < channels; i++) { float *outptr = top_blob.channel(i); - for (int j = 0; j < channels; j++) { - const float *ptr = bottom_blob.channel(j); - for (int k = 0; k < h; k++) { - outptr[j * h + k] = ptr[k * w + selected]; + const float *ptr = bottom_blob.channel(i); + for (int j = 0; j < h; j++) { + for (int k = 0; k < indices.w; k++) { + int selected = (int)(indices_ptr[k] + 0.5); + outptr[j * indices.w + k] = ptr[j * w + selected]; } } } - fprintf(stderr, "top_blob.size: (%d %d %d)\n", top_blob.c, top_blob.h, - top_blob.w); return 0; } diff --git a/tests/test_ops/test_ops.py b/tests/test_ops/test_ops.py index 6118be538..2b61d46e9 100644 --- a/tests/test_ops/test_ops.py +++ b/tests/test_ops/test_ops.py @@ -547,6 +547,70 @@ def test_constantofshape(backend, assert_allclose(model_outputs, ncnn_outputs, tolerate_small_mismatch) +@pytest.mark.parametrize('backend', [TEST_NCNN]) +@pytest.mark.parametrize('axis, data_dims, indice_dims', [(0, 1, 1), (0, 2, 1), + (1, 2, 1), (0, 3, 1), + (1, 3, 1), + (2, 3, 1)]) +def test_gather(backend, + axis, + data_dims, + indice_dims, + input_names=['input', 'indices'], + output_names=['output'], + tolerate_small_mismatch=False, + input_list=None, + save_dir=None): + backend.check_env() + + if input_list is None: + # the real data dims is data_dims + 1 + data = torch.rand((8, 12, 17)[-data_dims:]).unsqueeze(0) + indice = torch.randint(0, 8, (3, 4, 5)[-indice_dims:]).unsqueeze(0) + else: + data = input_list[0] + indice = input_list[1] + assert data.shape[0] == 1, (f'ncnn batch must be 1, \ + but got {data.shape[0]}') + assert indice.shape[0] == 1, (f'ncnn batch must be 1, \ + but got {indice.shape[0]}') + cfg = dict() + register_extra_symbolics(cfg=cfg, backend=backend.backend_name, opset=11) + + gather_node = make_node('Gather', input_names, output_names, axis=axis + 1) + gather_graph = make_graph([gather_node], 'gather_graph', [ + make_tensor_value_info(input_names[0], onnx.TensorProto.FLOAT, None), + make_tensor_value_info(input_names[1], onnx.TensorProto.INT64, None) + ], [make_tensor_value_info(output_names[0], onnx.TensorProto.FLOAT, None)]) + gather_model = make_model(gather_graph) + + ncnn_model = backend.onnx2ncnn(gather_model, 'gather', output_names, + save_dir) + + # ncnn mat has implicit batch for mat, the ncnn_output is a mat, + # so the ncnn_outputs has 2 dimensions, not 1. + import onnxruntime + import importlib + assert importlib.util.find_spec('onnxruntime') is not None, 'onnxruntime \ + not installed.' + + import numpy as np + session = onnxruntime.InferenceSession(gather_model.SerializeToString()) + model_outputs = session.run( + output_names, + dict( + zip(input_names, [ + np.array(data, dtype=np.float32), + np.array(indice[0], dtype=np.int64) + ]))) + model_outputs = [model_output for model_output in model_outputs] + + ncnn_outputs = ncnn_model( + dict(zip(input_names, [data.float(), indice.float()]))) + ncnn_outputs = [ncnn_outputs[name] for name in output_names] + assert_allclose(model_outputs, ncnn_outputs, tolerate_small_mismatch) + + @pytest.mark.parametrize('backend', [TEST_NCNN]) @pytest.mark.parametrize('dim', [1, 2, 3]) def test_tensorslice(backend, dim, input_list=None, save_dir=None): diff --git a/tests/test_ops/utils.py b/tests/test_ops/utils.py index d9b50f82f..fad5f3572 100644 --- a/tests/test_ops/utils.py +++ b/tests/test_ops/utils.py @@ -182,7 +182,7 @@ def run_and_validate(self, output_names=None, input_names=None, save_dir=None): - if not save_dir: + if save_dir is None: onnx_file_path = tempfile.NamedTemporaryFile().name ncnn_param_path = tempfile.NamedTemporaryFile().name ncnn_bin_path = tempfile.NamedTemporaryFile().name @@ -233,3 +233,25 @@ def run_and_validate(self, else: assert_allclose(model_outputs, ncnn_outputs, tolerate_small_mismatch) + + def onnx2ncnn(self, model, model_name, output_names, save_dir=None): + if save_dir is None: + onnx_file_path = tempfile.NamedTemporaryFile(suffix='.onnx').name + ncnn_param_path = tempfile.NamedTemporaryFile(suffix='.param').name + ncnn_bin_path = tempfile.NamedTemporaryFile(suffix='.bin').name + else: + onnx_file_path = os.path.join(save_dir, model_name + '.onnx') + ncnn_param_path = os.path.join(save_dir, model_name + '.param') + ncnn_bin_path = os.path.join(save_dir, model_name + '.bin') + + onnx.save_model(model, onnx_file_path) + + import mmdeploy.apis.ncnn as ncnn_apis + onnx2ncnn_path = ncnn_apis.get_onnx2ncnn_path() + subprocess.call( + [onnx2ncnn_path, onnx_file_path, ncnn_param_path, ncnn_bin_path]) + + ncnn_model = ncnn_apis.NCNNWrapper(ncnn_param_path, ncnn_bin_path, + output_names) + + return ncnn_model