From 1eaf0b519839ad14cac1325f64be63e3005b2cce Mon Sep 17 00:00:00 2001 From: tangyanfei Date: Wed, 31 Mar 2021 10:23:36 +0800 Subject: [PATCH 01/15] add onnxruntime custom op grid_sample --- mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp | 307 ++++++++++++++++++ .../onnxruntime/cpu/onnxruntime_register.cpp | 7 + mmcv/ops/csrc/onnxruntime/grid_sample.h | 43 +++ tests/test_ops/test_onnx.py | 38 +++ 4 files changed, 395 insertions(+) create mode 100644 mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp create mode 100644 mmcv/ops/csrc/onnxruntime/grid_sample.h diff --git a/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp b/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp new file mode 100644 index 00000000000..fe30f2a49a5 --- /dev/null +++ b/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp @@ -0,0 +1,307 @@ +#include "grid_sample.h" + +#include "../ort_mmcv_utils.h" + +#include + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)<(b))?(b):(a)) +#define CLIP_COORDINATES(in, out, clip_limit) out=MIN((clip_limit-1), MAX(in,0)) + +GridSampleKernel::GridSampleKernel(OrtApi api, const OrtKernelInfo* info) + : api_(api), ort_(api_), info_(info) { + align_corners_ = ort_.KernelInfoGetAttribute(info, "align_corners"); + interpolation_mode_ = ort_.KernelInfoGetAttribute(info, "interpolation_mode"); + padding_mode_ = ort_.KernelInfoGetAttribute(info, "padding_mode"); + + allocator_ = Ort::AllocatorWithDefaultOptions(); +} + +enum GridSamplerInterpolation { + Bilinear = 0, + Nearest = 1, + Bicubic = 2}; +enum GridSamplerPadding { + Zeros = 0, + Border = 1, + Reflection = 2}; + +template +static inline scalar_t grid_sampler_unnormalize(scalar_t coord, int64_t size, bool align_corners) { + if (align_corners) { + return ((coord + 1) / 2) * (size - 1); + } else { + return ((coord + 1) * size - 1) / 2; + } +} + +// Clips coordinates to between 0 and clip_limit - 1 +template +static inline scalar_t clip_coordinates(scalar_t in, int64_t clip_limit) { + return std::min(static_cast(clip_limit - 1), std::max(in, static_cast(0))); +} + +// Reflects coordinates until they fall between low and high (inclusive). +// The bounds are passed as twice their value so that half-integer values +// can be represented as ints. +template +static inline scalar_t reflect_coordinates(scalar_t in, int64_t twice_low, + int64_t twice_high) { + if (twice_low == twice_high) { + return static_cast(0); + } + scalar_t min = static_cast(twice_low) / 2; + scalar_t span = static_cast(twice_high - twice_low) / 2; + in = std::fabs(in - min); + // `fmod` returns same sign as `in`, which is positive after the `fabs` above. + scalar_t extra = std::fmod(in, span); + int flips = static_cast(std::floor(in / span)); + if (flips % 2 == 0) { + return extra + min; + } else { + return span - extra + min; + } +} + +template +static inline scalar_t compute_coordinates(scalar_t coord, int64_t size, + int64_t padding_mode, + bool align_corners) { + if (padding_mode == GridSamplerPadding::Border) { + coord = clip_coordinates(coord, size); + } else if (padding_mode == GridSamplerPadding::Reflection) { + if (align_corners) { + coord = reflect_coordinates(coord, 0, 2*(size - 1)); + } else { + coord = reflect_coordinates(coord, -1, 2 * size - 1); + } + coord = clip_coordinates(coord, size); + } + return coord; +} + +// Computes the pixel source index value for a grid coordinate +template +static inline scalar_t grid_sampler_compute_source_index( + scalar_t coord, + int64_t size, + int64_t padding_mode, + bool align_corners) { + coord = grid_sampler_unnormalize(coord, size, align_corners); + coord = compute_coordinates(coord, size, padding_mode, align_corners); + return coord; +} + +static inline bool within_bounds_2d(int64_t h, int64_t w, int64_t H, int64_t W) { + return h >= 0 && h < H && w >= 0 && w < W; +} + +template +static inline scalar_t get_value_bounded( + const scalar_t* data, + scalar_t x, + scalar_t y, + int64_t W, + int64_t H, + int64_t sW, + int64_t sH, + int64_t padding_mode, + bool align_corners) { + + x = compute_coordinates(x, W, padding_mode, align_corners); + y = compute_coordinates(y, H, padding_mode, align_corners); + + int64_t ix = static_cast(x); + int64_t iy = static_cast(y); + + if (within_bounds_2d(iy, ix, H, W)) { + return data[iy * sH + ix * sW]; + } + return static_cast(0); +} + +template +static inline scalar_t cubic_convolution1(scalar_t x, scalar_t A) { + return ((A + 2) * x - (A + 3)) * x * x + 1; +} + +template +static inline scalar_t cubic_convolution2(scalar_t x, scalar_t A) { + return ((A * x - 5 * A) * x + 8 * A) * x - 4 * A; +} + +template +static inline void get_cubic_upsample_coefficients( + scalar_t coeffs[4], + scalar_t t) { + scalar_t A = -0.75; + + scalar_t x1 = t; + coeffs[0] = cubic_convolution2(x1 + 1.0, A); + coeffs[1] = cubic_convolution1(x1, A); + + // opposite coefficients + scalar_t x2 = 1.0 - t; + coeffs[2] = cubic_convolution1(x2, A); + coeffs[3] = cubic_convolution2(x2 + 1.0, A); +} + +template +static inline scalar_t cubic_interp1d( + scalar_t x0, + scalar_t x1, + scalar_t x2, + scalar_t x3, + scalar_t t) { + scalar_t coeffs[4]; + get_cubic_upsample_coefficients(coeffs, t); + + return x0 * coeffs[0] + x1 * coeffs[1] + x2 * coeffs[2] + x3 * coeffs[3]; +} + +void GridSampleKernel::Compute(OrtKernelContext* context) { + const bool align_corners = align_corners_; + const int64_t padding_mode = padding_mode_; + const int64_t interpolation_mode = interpolation_mode_; + + const OrtValue* input = ort_.KernelContext_GetInput(context, 0); + const float* input_data = + reinterpret_cast(ort_.GetTensorData(input)); + + const OrtValue* grid = ort_.KernelContext_GetInput(context, 1); + const float* grid_data = + reinterpret_cast(ort_.GetTensorData(grid)); + + OrtTensorDimensions input_dims(ort_, input); + OrtTensorDimensions grid_dims(ort_, grid); + int64_t N = input_dims[0]; + int64_t C = input_dims[1]; + int64_t inp_H = input_dims[2]; + int64_t inp_W = input_dims[3]; + int64_t out_H = grid_dims[1]; + int64_t out_W = grid_dims[2]; + + std::vector output_dims = {N, C, out_H, out_W}; + OrtValue *output = ort_.KernelContext_GetOutput(context, 0, + output_dims.data(), + output_dims.size()); + float* out_ptr = ort_.GetTensorMutableData(output); + + int64_t inp_sN = input_dims[1] * input_dims[2] * input_dims[3]; + int64_t inp_sC = input_dims[2] * input_dims[3]; + int64_t inp_sH = input_dims[3]; + int64_t inp_sW = 1; + int64_t grid_sN = grid_dims[1] * grid_dims[2] * grid_dims[3]; + int64_t grid_sH = grid_dims[2] * grid_dims[3]; + int64_t grid_sW = grid_dims[3]; + int64_t grid_sCoor = 1; + int64_t out_sN = output_dims[1] * output_dims[2] * output_dims[3]; + int64_t out_sC = output_dims[2] * output_dims[3]; + int64_t out_sH = output_dims[3]; + int64_t out_sW = 1; + + // loop over each output pixel + for (int64_t n = 0; n < N; ++n) { + const float* grid_ptr_N = grid_data + n * grid_sN; + const float* inp_ptr_N = input_data + n * inp_sN; + for (int64_t h = 0; h < out_H; ++h) { + for (int64_t w = 0; w < out_W; ++w) { + const float* grid_ptr_NHW = grid_ptr_N + h * grid_sH + w * grid_sW; + float x = *grid_ptr_NHW; + float y = grid_ptr_NHW[grid_sCoor]; + + float ix = grid_sampler_compute_source_index(x, inp_W, padding_mode, align_corners); + float iy = grid_sampler_compute_source_index(y, inp_H, padding_mode, align_corners); + + if (interpolation_mode == GridSamplerInterpolation::Bilinear) { + // get corner pixel values from (x, y) + // for 4d, we use north-east-south-west + int64_t ix_nw = static_cast(std::floor(ix)); + int64_t iy_nw = static_cast(std::floor(iy)); + + int64_t ix_ne = ix_nw + 1; + int64_t iy_ne = iy_nw; + + int64_t ix_sw = ix_nw; + int64_t iy_sw = iy_nw + 1; + + int64_t ix_se = ix_nw + 1; + int64_t iy_se = iy_nw + 1; + + + // get surfaces to each neighbor: + float nw = (ix_se - ix) * (iy_se - iy); + float ne = (ix - ix_sw) * (iy_sw - iy); + float sw = (ix_ne - ix) * (iy - iy_ne); + float se = (ix - ix_nw) * (iy - iy_nw); + + // calculate bilinear weighted pixel value and set output pixel + const float *inp_ptr_NC = inp_ptr_N; + float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW; + for (int64_t c = 0; c < C; ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) { + auto res = static_cast(0); + if (within_bounds_2d(iy_nw, ix_nw, inp_H, inp_W)) { + res += inp_ptr_NC[iy_nw * inp_sH + ix_nw * inp_sW] * nw; + } + if (within_bounds_2d(iy_ne, ix_ne, inp_H, inp_W)) { + res += inp_ptr_NC[iy_ne * inp_sH + ix_ne * inp_sW] * ne; + } + if (within_bounds_2d(iy_sw, ix_sw, inp_H, inp_W)) { + res += inp_ptr_NC[iy_sw * inp_sH + ix_sw * inp_sW] * sw; + } + if (within_bounds_2d(iy_se, ix_se, inp_H, inp_W)) { + res += inp_ptr_NC[iy_se * inp_sH + ix_se * inp_sW] * se; + } + *out_ptr_NCHW = res; + } + } else if (interpolation_mode == GridSamplerInterpolation::Nearest) { + int64_t ix_nearest = static_cast(std::nearbyint(ix)); + int64_t iy_nearest = static_cast(std::nearbyint(iy)); + + // assign nearest neighor pixel value to output pixel + float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW; + const float *inp_ptr_NC = inp_ptr_N; + for (int64_t c = 0; c < C; ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) { + if (within_bounds_2d(iy_nearest, ix_nearest, inp_H, inp_W)) { + *out_ptr_NCHW = inp_ptr_NC[iy_nearest * inp_sH + ix_nearest * inp_sW]; + } else { + *out_ptr_NCHW = static_cast(0); + } + } + } else if (interpolation_mode == GridSamplerInterpolation::Bicubic) { + // grid_sampler_compute_source_index will "clip the value" of idx depends on the padding, + // which would cause calculation to be wrong, + // for example x = -0.1 -> ix = 0 for zero padding, but in bicubic ix = floor(x) = -1 + // There would be more problem in reflection padding, since the -1 and +1 direction is not fixed in boundary condition + ix = grid_sampler_unnormalize(x, inp_W, align_corners); + iy = grid_sampler_unnormalize(y, inp_H, align_corners); + + float ix_nw = std::floor(ix); + float iy_nw = std::floor(iy); + + const float tx = ix - ix_nw; + const float ty = iy - iy_nw; + + const float *inp_ptr_NC = inp_ptr_N; + float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW; + for (int64_t c = 0; c < C; ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) { + float coefficients[4]; + + // Interpolate 4 values in the x directon + for (int64_t i = 0; i < 4; ++i) { + coefficients[i] = cubic_interp1d( + get_value_bounded(inp_ptr_NC, ix_nw - 1, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), + get_value_bounded(inp_ptr_NC, ix_nw + 0, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), + get_value_bounded(inp_ptr_NC, ix_nw + 1, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), + get_value_bounded(inp_ptr_NC, ix_nw + 2, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), + tx); + } + + // Interpolate in the y direction + *out_ptr_NCHW = cubic_interp1d(coefficients[0], coefficients[1], coefficients[2], coefficients[3], ty); + } + } + } + } + } +} diff --git a/mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp b/mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp index 94614c85574..06196b8110f 100644 --- a/mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp +++ b/mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp @@ -4,11 +4,13 @@ #include "ort_mmcv_utils.h" #include "roi_align.h" #include "soft_nms.h" +#include "grid_sample.h" const char *c_MMCVOpDomain = "mmcv"; SoftNmsOp c_SoftNmsOp; NmsOp c_NmsOp; MMCVRoiAlignCustomOp c_MMCVRoiAlignCustomOp; +GridSampleOp c_GridSampleOp; OrtStatus *ORT_API_CALL RegisterCustomOps(OrtSessionOptions *options, const OrtApiBase *api) { @@ -32,5 +34,10 @@ OrtStatus *ORT_API_CALL RegisterCustomOps(OrtSessionOptions *options, return status; } + if (auto status = + ortApi->CustomOpDomain_Add(domain, &c_GridSampleOp)) { + return status; + } + return ortApi->AddCustomOpDomain(options, domain); } diff --git a/mmcv/ops/csrc/onnxruntime/grid_sample.h b/mmcv/ops/csrc/onnxruntime/grid_sample.h new file mode 100644 index 00000000000..b10555cf13d --- /dev/null +++ b/mmcv/ops/csrc/onnxruntime/grid_sample.h @@ -0,0 +1,43 @@ +#ifndef ONNXRUNTIME_GRIDSAMPLE_H +#define ONNXRUNTIME_GRIDSAMPLE_H + +#include + +struct GridSampleKernel { + GridSampleKernel(OrtApi api, const OrtKernelInfo *info); + + void Compute(OrtKernelContext *context); + + protected: + OrtApi api_; + Ort::CustomOpApi ort_; + const OrtKernelInfo *info_; + Ort::AllocatorWithDefaultOptions allocator_; + + int64_t align_corners_; + int64_t interpolation_mode_; + int64_t padding_mode_; +}; + +struct GridSampleOp : Ort::CustomOpBase { + void* CreateKernel(OrtApi api, const OrtKernelInfo *info) const { + return new GridSampleKernel(api, info); + }; + + const char* GetName() const { return "grid_sampler"; }; + + size_t GetInputTypeCount() const { return 2; }; + ONNXTensorElementDataType GetInputType(size_t /*index*/) const { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; + }; + + size_t GetOutputTypeCount() const { return 1; }; + ONNXTensorElementDataType GetOutputType(size_t /*index*/) const { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; + }; + + const char* GetExecutionProviderType() const { + return "CPUExecutionProvider"; + }; +}; +#endif diff --git a/tests/test_ops/test_onnx.py b/tests/test_ops/test_onnx.py index cc1ccb82387..4faf1f95a2d 100644 --- a/tests/test_ops/test_onnx.py +++ b/tests/test_ops/test_onnx.py @@ -23,6 +23,44 @@ def forward(self, *args, **kwargs): return self.wrapped_function(*args, **kwargs) +class GridSample(torch.nn.Module): + + def forward(self, x, y): + res = torch.nn.functional.grid_sample(x, grid=y, align_corners=False) + return res + + +def test_grid_sampler(): + from mmcv.onnx.symbolic import register_extra_symbolics + register_extra_symbolics() + input = torch.ones(1, 1, 2, 2) + out_h = 4 + out_w = 4 + h = torch.linspace(-1, 1, out_h) + w = torch.linspace(-1, 1, out_w) + grid = torch.zeros(out_h, out_w, 2) + grid[:, :, 0] = w.unsqueeze(0).repeat(out_h, 1) + grid[:, :, 1] = h.unsqueeze(0).repeat(out_w, 1).transpose(0, 1) + grid = grid.unsqueeze(0).repeat(1, 1, 1, 1) + + model = GridSample() + torch.onnx.export(model, (input, grid), onnx_file, opset_version=11) + + pytorch_output = model(input, grid) + + from mmcv.ops import get_onnxruntime_op_path + ort_custom_op_path = get_onnxruntime_op_path() + session_options = rt.SessionOptions() + if os.path.exists(ort_custom_op_path): + session_options.register_custom_ops_library(ort_custom_op_path) + sess = rt.InferenceSession(onnx_file, session_options) + input_feature = input.cpu().numpy() + grid_feature = grid.cpu().numpy() + onnx_output = sess.run(None, {'x': input_feature, 'y': grid_feature}) + os.remove(onnx_file) + assert np.allclose(pytorch_output, onnx_output, atol=1e-3) + + def test_nms(): if torch.__version__ == 'parrots': pytest.skip('onnx is not supported in parrots directly') From c32a838efb817d0332c5f3109b267e1c7e4bc197 Mon Sep 17 00:00:00 2001 From: tangyanfei Date: Wed, 31 Mar 2021 10:44:19 +0800 Subject: [PATCH 02/15] update code --- mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp | 305 +++++++++++-------- 1 file changed, 177 insertions(+), 128 deletions(-) diff --git a/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp b/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp index fe30f2a49a5..10c9eebb21d 100644 --- a/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp +++ b/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp @@ -4,75 +4,97 @@ #include -#define MIN(a,b) (((a)<(b))?(a):(b)) -#define MAX(a,b) (((a)<(b))?(b):(a)) -#define CLIP_COORDINATES(in, out, clip_limit) out=MIN((clip_limit-1), MAX(in,0)) - -GridSampleKernel::GridSampleKernel(OrtApi api, const OrtKernelInfo* info) - : api_(api), ort_(api_), info_(info) { - align_corners_ = ort_.KernelInfoGetAttribute(info, "align_corners"); - interpolation_mode_ = ort_.KernelInfoGetAttribute(info, "interpolation_mode"); - padding_mode_ = ort_.KernelInfoGetAttribute(info, "padding_mode"); - - allocator_ = Ort::AllocatorWithDefaultOptions(); +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MAX(a, b) (((a) < (b)) ? (b) : (a)) +#define CLIP_COORDINATES(in, out, clip_limit) out = MIN((clip_limit - 1), MAX(in, 0)) + +GridSampleKernel::GridSampleKernel(OrtApi api, const OrtKernelInfo *info) + : api_(api), ort_(api_), info_(info) +{ + align_corners_ = ort_.KernelInfoGetAttribute(info, "align_corners"); + interpolation_mode_ = ort_.KernelInfoGetAttribute(info, "interpolation_mode"); + padding_mode_ = ort_.KernelInfoGetAttribute(info, "padding_mode"); + + allocator_ = Ort::AllocatorWithDefaultOptions(); } -enum GridSamplerInterpolation { +enum GridSamplerInterpolation +{ Bilinear = 0, Nearest = 1, - Bicubic = 2}; -enum GridSamplerPadding { + Bicubic = 2 +}; +enum GridSamplerPadding +{ Zeros = 0, Border = 1, - Reflection = 2}; + Reflection = 2 +}; -template -static inline scalar_t grid_sampler_unnormalize(scalar_t coord, int64_t size, bool align_corners) { - if (align_corners) { +template +static inline scalar_t grid_sampler_unnormalize(scalar_t coord, int64_t size, bool align_corners) +{ + if (align_corners) + { return ((coord + 1) / 2) * (size - 1); - } else { + } + else + { return ((coord + 1) * size - 1) / 2; } } // Clips coordinates to between 0 and clip_limit - 1 -template -static inline scalar_t clip_coordinates(scalar_t in, int64_t clip_limit) { - return std::min(static_cast(clip_limit - 1), std::max(in, static_cast(0))); +template +static inline scalar_t clip_coordinates(scalar_t in, int64_t clip_limit) +{ + return std::min(static_cast(clip_limit - 1), std::max(in, static_cast(0))); } // Reflects coordinates until they fall between low and high (inclusive). // The bounds are passed as twice their value so that half-integer values // can be represented as ints. -template +template static inline scalar_t reflect_coordinates(scalar_t in, int64_t twice_low, - int64_t twice_high) { - if (twice_low == twice_high) { - return static_cast(0); - } - scalar_t min = static_cast(twice_low) / 2; - scalar_t span = static_cast(twice_high - twice_low) / 2; - in = std::fabs(in - min); - // `fmod` returns same sign as `in`, which is positive after the `fabs` above. - scalar_t extra = std::fmod(in, span); - int flips = static_cast(std::floor(in / span)); - if (flips % 2 == 0) { - return extra + min; - } else { - return span - extra + min; - } + int64_t twice_high) +{ + if (twice_low == twice_high) + { + return static_cast(0); + } + scalar_t min = static_cast(twice_low) / 2; + scalar_t span = static_cast(twice_high - twice_low) / 2; + in = std::fabs(in - min); + // `fmod` returns same sign as `in`, which is positive after the `fabs` above. + scalar_t extra = std::fmod(in, span); + int flips = static_cast(std::floor(in / span)); + if (flips % 2 == 0) + { + return extra + min; + } + else + { + return span - extra + min; + } } -template +template static inline scalar_t compute_coordinates(scalar_t coord, int64_t size, int64_t padding_mode, - bool align_corners) { - if (padding_mode == GridSamplerPadding::Border) { + bool align_corners) +{ + if (padding_mode == GridSamplerPadding::Border) + { coord = clip_coordinates(coord, size); - } else if (padding_mode == GridSamplerPadding::Reflection) { - if (align_corners) { - coord = reflect_coordinates(coord, 0, 2*(size - 1)); - } else { + } + else if (padding_mode == GridSamplerPadding::Reflection) + { + if (align_corners) + { + coord = reflect_coordinates(coord, 0, 2 * (size - 1)); + } + else + { coord = reflect_coordinates(coord, -1, 2 * size - 1); } coord = clip_coordinates(coord, size); @@ -86,19 +108,21 @@ static inline scalar_t grid_sampler_compute_source_index( scalar_t coord, int64_t size, int64_t padding_mode, - bool align_corners) { - coord = grid_sampler_unnormalize(coord, size, align_corners); - coord = compute_coordinates(coord, size, padding_mode, align_corners); - return coord; + bool align_corners) +{ + coord = grid_sampler_unnormalize(coord, size, align_corners); + coord = compute_coordinates(coord, size, padding_mode, align_corners); + return coord; } -static inline bool within_bounds_2d(int64_t h, int64_t w, int64_t H, int64_t W) { - return h >= 0 && h < H && w >= 0 && w < W; +static inline bool within_bounds_2d(int64_t h, int64_t w, int64_t H, int64_t W) +{ + return h >= 0 && h < H && w >= 0 && w < W; } -template +template static inline scalar_t get_value_bounded( - const scalar_t* data, + const scalar_t *data, scalar_t x, scalar_t y, int64_t W, @@ -106,44 +130,49 @@ static inline scalar_t get_value_bounded( int64_t sW, int64_t sH, int64_t padding_mode, - bool align_corners) { + bool align_corners) +{ - x = compute_coordinates(x, W, padding_mode, align_corners); - y = compute_coordinates(y, H, padding_mode, align_corners); + x = compute_coordinates(x, W, padding_mode, align_corners); + y = compute_coordinates(y, H, padding_mode, align_corners); - int64_t ix = static_cast(x); - int64_t iy = static_cast(y); + int64_t ix = static_cast(x); + int64_t iy = static_cast(y); - if (within_bounds_2d(iy, ix, H, W)) { - return data[iy * sH + ix * sW]; - } - return static_cast(0); + if (within_bounds_2d(iy, ix, H, W)) + { + return data[iy * sH + ix * sW]; + } + return static_cast(0); } template -static inline scalar_t cubic_convolution1(scalar_t x, scalar_t A) { - return ((A + 2) * x - (A + 3)) * x * x + 1; +static inline scalar_t cubic_convolution1(scalar_t x, scalar_t A) +{ + return ((A + 2) * x - (A + 3)) * x * x + 1; } template -static inline scalar_t cubic_convolution2(scalar_t x, scalar_t A) { - return ((A * x - 5 * A) * x + 8 * A) * x - 4 * A; +static inline scalar_t cubic_convolution2(scalar_t x, scalar_t A) +{ + return ((A * x - 5 * A) * x + 8 * A) * x - 4 * A; } template static inline void get_cubic_upsample_coefficients( scalar_t coeffs[4], - scalar_t t) { - scalar_t A = -0.75; - - scalar_t x1 = t; - coeffs[0] = cubic_convolution2(x1 + 1.0, A); - coeffs[1] = cubic_convolution1(x1, A); - - // opposite coefficients - scalar_t x2 = 1.0 - t; - coeffs[2] = cubic_convolution1(x2, A); - coeffs[3] = cubic_convolution2(x2 + 1.0, A); + scalar_t t) +{ + scalar_t A = -0.75; + + scalar_t x1 = t; + coeffs[0] = cubic_convolution2(x1 + 1.0, A); + coeffs[1] = cubic_convolution1(x1, A); + + // opposite coefficients + scalar_t x2 = 1.0 - t; + coeffs[2] = cubic_convolution1(x2, A); + coeffs[3] = cubic_convolution2(x2 + 1.0, A); } template @@ -152,25 +181,27 @@ static inline scalar_t cubic_interp1d( scalar_t x1, scalar_t x2, scalar_t x3, - scalar_t t) { - scalar_t coeffs[4]; - get_cubic_upsample_coefficients(coeffs, t); + scalar_t t) +{ + scalar_t coeffs[4]; + get_cubic_upsample_coefficients(coeffs, t); - return x0 * coeffs[0] + x1 * coeffs[1] + x2 * coeffs[2] + x3 * coeffs[3]; + return x0 * coeffs[0] + x1 * coeffs[1] + x2 * coeffs[2] + x3 * coeffs[3]; } -void GridSampleKernel::Compute(OrtKernelContext* context) { +void GridSampleKernel::Compute(OrtKernelContext *context) +{ const bool align_corners = align_corners_; const int64_t padding_mode = padding_mode_; const int64_t interpolation_mode = interpolation_mode_; - const OrtValue* input = ort_.KernelContext_GetInput(context, 0); - const float* input_data = - reinterpret_cast(ort_.GetTensorData(input)); + const OrtValue *input = ort_.KernelContext_GetInput(context, 0); + const float *input_data = + reinterpret_cast(ort_.GetTensorData(input)); - const OrtValue* grid = ort_.KernelContext_GetInput(context, 1); - const float* grid_data = - reinterpret_cast(ort_.GetTensorData(grid)); + const OrtValue *grid = ort_.KernelContext_GetInput(context, 1); + const float *grid_data = + reinterpret_cast(ort_.GetTensorData(grid)); OrtTensorDimensions input_dims(ort_, input); OrtTensorDimensions grid_dims(ort_, grid); @@ -185,7 +216,7 @@ void GridSampleKernel::Compute(OrtKernelContext* context) { OrtValue *output = ort_.KernelContext_GetOutput(context, 0, output_dims.data(), output_dims.size()); - float* out_ptr = ort_.GetTensorMutableData(output); + float *out_ptr = ort_.GetTensorMutableData(output); int64_t inp_sN = input_dims[1] * input_dims[2] * input_dims[3]; int64_t inp_sC = input_dims[2] * input_dims[3]; @@ -201,19 +232,23 @@ void GridSampleKernel::Compute(OrtKernelContext* context) { int64_t out_sW = 1; // loop over each output pixel - for (int64_t n = 0; n < N; ++n) { - const float* grid_ptr_N = grid_data + n * grid_sN; - const float* inp_ptr_N = input_data + n * inp_sN; - for (int64_t h = 0; h < out_H; ++h) { - for (int64_t w = 0; w < out_W; ++w) { - const float* grid_ptr_NHW = grid_ptr_N + h * grid_sH + w * grid_sW; + for (int64_t n = 0; n < N; ++n) + { + const float *grid_ptr_N = grid_data + n * grid_sN; + const float *inp_ptr_N = input_data + n * inp_sN; + for (int64_t h = 0; h < out_H; ++h) + { + for (int64_t w = 0; w < out_W; ++w) + { + const float *grid_ptr_NHW = grid_ptr_N + h * grid_sH + w * grid_sW; float x = *grid_ptr_NHW; float y = grid_ptr_NHW[grid_sCoor]; float ix = grid_sampler_compute_source_index(x, inp_W, padding_mode, align_corners); float iy = grid_sampler_compute_source_index(y, inp_H, padding_mode, align_corners); - if (interpolation_mode == GridSamplerInterpolation::Bilinear) { + if (interpolation_mode == GridSamplerInterpolation::Bilinear) + { // get corner pixel values from (x, y) // for 4d, we use north-east-south-west int64_t ix_nw = static_cast(std::floor(ix)); @@ -228,47 +263,59 @@ void GridSampleKernel::Compute(OrtKernelContext* context) { int64_t ix_se = ix_nw + 1; int64_t iy_se = iy_nw + 1; - // get surfaces to each neighbor: - float nw = (ix_se - ix) * (iy_se - iy); - float ne = (ix - ix_sw) * (iy_sw - iy); - float sw = (ix_ne - ix) * (iy - iy_ne); - float se = (ix - ix_nw) * (iy - iy_nw); + float nw = (ix_se - ix) * (iy_se - iy); + float ne = (ix - ix_sw) * (iy_sw - iy); + float sw = (ix_ne - ix) * (iy - iy_ne); + float se = (ix - ix_nw) * (iy - iy_nw); // calculate bilinear weighted pixel value and set output pixel const float *inp_ptr_NC = inp_ptr_N; float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW; - for (int64_t c = 0; c < C; ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) { + for (int64_t c = 0; c < C; ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) + { auto res = static_cast(0); - if (within_bounds_2d(iy_nw, ix_nw, inp_H, inp_W)) { + if (within_bounds_2d(iy_nw, ix_nw, inp_H, inp_W)) + { res += inp_ptr_NC[iy_nw * inp_sH + ix_nw * inp_sW] * nw; } - if (within_bounds_2d(iy_ne, ix_ne, inp_H, inp_W)) { + if (within_bounds_2d(iy_ne, ix_ne, inp_H, inp_W)) + { res += inp_ptr_NC[iy_ne * inp_sH + ix_ne * inp_sW] * ne; } - if (within_bounds_2d(iy_sw, ix_sw, inp_H, inp_W)) { + if (within_bounds_2d(iy_sw, ix_sw, inp_H, inp_W)) + { res += inp_ptr_NC[iy_sw * inp_sH + ix_sw * inp_sW] * sw; } - if (within_bounds_2d(iy_se, ix_se, inp_H, inp_W)) { + if (within_bounds_2d(iy_se, ix_se, inp_H, inp_W)) + { res += inp_ptr_NC[iy_se * inp_sH + ix_se * inp_sW] * se; } *out_ptr_NCHW = res; } - } else if (interpolation_mode == GridSamplerInterpolation::Nearest) { - int64_t ix_nearest = static_cast(std::nearbyint(ix)); - int64_t iy_nearest = static_cast(std::nearbyint(iy)); - - // assign nearest neighor pixel value to output pixel - float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW; - const float *inp_ptr_NC = inp_ptr_N; - for (int64_t c = 0; c < C; ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) { - if (within_bounds_2d(iy_nearest, ix_nearest, inp_H, inp_W)) { - *out_ptr_NCHW = inp_ptr_NC[iy_nearest * inp_sH + ix_nearest * inp_sW]; - } else { - *out_ptr_NCHW = static_cast(0); - } + } + else if (interpolation_mode == GridSamplerInterpolation::Nearest) + { + int64_t ix_nearest = static_cast(std::nearbyint(ix)); + int64_t iy_nearest = static_cast(std::nearbyint(iy)); + + // assign nearest neighor pixel value to output pixel + float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW; + const float *inp_ptr_NC = inp_ptr_N; + for (int64_t c = 0; c < C; ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) + { + if (within_bounds_2d(iy_nearest, ix_nearest, inp_H, inp_W)) + { + *out_ptr_NCHW = inp_ptr_NC[iy_nearest * inp_sH + ix_nearest * inp_sW]; } - } else if (interpolation_mode == GridSamplerInterpolation::Bicubic) { + else + { + *out_ptr_NCHW = static_cast(0); + } + } + } + else if (interpolation_mode == GridSamplerInterpolation::Bicubic) + { // grid_sampler_compute_source_index will "clip the value" of idx depends on the padding, // which would cause calculation to be wrong, // for example x = -0.1 -> ix = 0 for zero padding, but in bicubic ix = floor(x) = -1 @@ -284,17 +331,19 @@ void GridSampleKernel::Compute(OrtKernelContext* context) { const float *inp_ptr_NC = inp_ptr_N; float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW; - for (int64_t c = 0; c < C; ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) { + for (int64_t c = 0; c < C; ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) + { float coefficients[4]; // Interpolate 4 values in the x directon - for (int64_t i = 0; i < 4; ++i) { - coefficients[i] = cubic_interp1d( - get_value_bounded(inp_ptr_NC, ix_nw - 1, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), - get_value_bounded(inp_ptr_NC, ix_nw + 0, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), - get_value_bounded(inp_ptr_NC, ix_nw + 1, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), - get_value_bounded(inp_ptr_NC, ix_nw + 2, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), - tx); + for (int64_t i = 0; i < 4; ++i) + { + coefficients[i] = cubic_interp1d( + get_value_bounded(inp_ptr_NC, ix_nw - 1, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), + get_value_bounded(inp_ptr_NC, ix_nw + 0, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), + get_value_bounded(inp_ptr_NC, ix_nw + 1, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), + get_value_bounded(inp_ptr_NC, ix_nw + 2, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), + tx); } // Interpolate in the y direction From 2be9478a371ef3ffbf4c5bef78f6816973900061 Mon Sep 17 00:00:00 2001 From: tangyanfei Date: Wed, 31 Mar 2021 11:05:53 +0800 Subject: [PATCH 03/15] update code --- mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp | 548 +++++++++---------- 1 file changed, 251 insertions(+), 297 deletions(-) diff --git a/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp b/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp index 10c9eebb21d..458ec16e3e6 100644 --- a/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp +++ b/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp @@ -6,49 +6,37 @@ #define MIN(a, b) (((a) < (b)) ? (a) : (b)) #define MAX(a, b) (((a) < (b)) ? (b) : (a)) -#define CLIP_COORDINATES(in, out, clip_limit) out = MIN((clip_limit - 1), MAX(in, 0)) +#define CLIP_COORDINATES(in, out, clip_limit) \ + out = MIN((clip_limit - 1), MAX(in, 0)) GridSampleKernel::GridSampleKernel(OrtApi api, const OrtKernelInfo *info) - : api_(api), ort_(api_), info_(info) -{ - align_corners_ = ort_.KernelInfoGetAttribute(info, "align_corners"); - interpolation_mode_ = ort_.KernelInfoGetAttribute(info, "interpolation_mode"); - padding_mode_ = ort_.KernelInfoGetAttribute(info, "padding_mode"); + : api_(api), ort_(api_), info_(info) { + align_corners_ = ort_.KernelInfoGetAttribute(info, "align_corners"); + interpolation_mode_ = + ort_.KernelInfoGetAttribute(info, "interpolation_mode"); + padding_mode_ = ort_.KernelInfoGetAttribute(info, "padding_mode"); - allocator_ = Ort::AllocatorWithDefaultOptions(); + allocator_ = Ort::AllocatorWithDefaultOptions(); } -enum GridSamplerInterpolation -{ - Bilinear = 0, - Nearest = 1, - Bicubic = 2 -}; -enum GridSamplerPadding -{ - Zeros = 0, - Border = 1, - Reflection = 2 -}; +enum GridSamplerInterpolation { Bilinear = 0, Nearest = 1, Bicubic = 2 }; +enum GridSamplerPadding { Zeros = 0, Border = 1, Reflection = 2 }; template -static inline scalar_t grid_sampler_unnormalize(scalar_t coord, int64_t size, bool align_corners) -{ - if (align_corners) - { - return ((coord + 1) / 2) * (size - 1); - } - else - { - return ((coord + 1) * size - 1) / 2; - } +static inline scalar_t grid_sampler_unnormalize(scalar_t coord, int64_t size, + bool align_corners) { + if (align_corners) { + return ((coord + 1) / 2) * (size - 1); + } else { + return ((coord + 1) * size - 1) / 2; + } } // Clips coordinates to between 0 and clip_limit - 1 template -static inline scalar_t clip_coordinates(scalar_t in, int64_t clip_limit) -{ - return std::min(static_cast(clip_limit - 1), std::max(in, static_cast(0))); +static inline scalar_t clip_coordinates(scalar_t in, int64_t clip_limit) { + return std::min(static_cast(clip_limit - 1), + std::max(in, static_cast(0))); } // Reflects coordinates until they fall between low and high (inclusive). @@ -56,301 +44,267 @@ static inline scalar_t clip_coordinates(scalar_t in, int64_t clip_limit) // can be represented as ints. template static inline scalar_t reflect_coordinates(scalar_t in, int64_t twice_low, - int64_t twice_high) -{ - if (twice_low == twice_high) - { - return static_cast(0); - } - scalar_t min = static_cast(twice_low) / 2; - scalar_t span = static_cast(twice_high - twice_low) / 2; - in = std::fabs(in - min); - // `fmod` returns same sign as `in`, which is positive after the `fabs` above. - scalar_t extra = std::fmod(in, span); - int flips = static_cast(std::floor(in / span)); - if (flips % 2 == 0) - { - return extra + min; - } - else - { - return span - extra + min; - } + int64_t twice_high) { + if (twice_low == twice_high) { + return static_cast(0); + } + scalar_t min = static_cast(twice_low) / 2; + scalar_t span = static_cast(twice_high - twice_low) / 2; + in = std::fabs(in - min); + // `fmod` returns same sign as `in`, which is positive after the `fabs` above. + scalar_t extra = std::fmod(in, span); + int flips = static_cast(std::floor(in / span)); + if (flips % 2 == 0) { + return extra + min; + } else { + return span - extra + min; + } } template static inline scalar_t compute_coordinates(scalar_t coord, int64_t size, int64_t padding_mode, - bool align_corners) -{ - if (padding_mode == GridSamplerPadding::Border) - { - coord = clip_coordinates(coord, size); + bool align_corners) { + if (padding_mode == GridSamplerPadding::Border) { + coord = clip_coordinates(coord, size); + } else if (padding_mode == GridSamplerPadding::Reflection) { + if (align_corners) { + coord = reflect_coordinates(coord, 0, 2 * (size - 1)); + } else { + coord = reflect_coordinates(coord, -1, 2 * size - 1); } - else if (padding_mode == GridSamplerPadding::Reflection) - { - if (align_corners) - { - coord = reflect_coordinates(coord, 0, 2 * (size - 1)); - } - else - { - coord = reflect_coordinates(coord, -1, 2 * size - 1); - } - coord = clip_coordinates(coord, size); - } - return coord; + coord = clip_coordinates(coord, size); + } + return coord; } // Computes the pixel source index value for a grid coordinate template -static inline scalar_t grid_sampler_compute_source_index( - scalar_t coord, - int64_t size, - int64_t padding_mode, - bool align_corners) -{ - coord = grid_sampler_unnormalize(coord, size, align_corners); - coord = compute_coordinates(coord, size, padding_mode, align_corners); - return coord; +static inline scalar_t +grid_sampler_compute_source_index(scalar_t coord, int64_t size, + int64_t padding_mode, bool align_corners) { + coord = grid_sampler_unnormalize(coord, size, align_corners); + coord = compute_coordinates(coord, size, padding_mode, align_corners); + return coord; } -static inline bool within_bounds_2d(int64_t h, int64_t w, int64_t H, int64_t W) -{ - return h >= 0 && h < H && w >= 0 && w < W; +static inline bool within_bounds_2d(int64_t h, int64_t w, int64_t H, + int64_t W) { + return h >= 0 && h < H && w >= 0 && w < W; } template -static inline scalar_t get_value_bounded( - const scalar_t *data, - scalar_t x, - scalar_t y, - int64_t W, - int64_t H, - int64_t sW, - int64_t sH, - int64_t padding_mode, - bool align_corners) -{ - - x = compute_coordinates(x, W, padding_mode, align_corners); - y = compute_coordinates(y, H, padding_mode, align_corners); - - int64_t ix = static_cast(x); - int64_t iy = static_cast(y); - - if (within_bounds_2d(iy, ix, H, W)) - { - return data[iy * sH + ix * sW]; - } - return static_cast(0); +static inline scalar_t +get_value_bounded(const scalar_t *data, scalar_t x, scalar_t y, int64_t W, + int64_t H, int64_t sW, int64_t sH, int64_t padding_mode, + bool align_corners) { + + x = compute_coordinates(x, W, padding_mode, align_corners); + y = compute_coordinates(y, H, padding_mode, align_corners); + + int64_t ix = static_cast(x); + int64_t iy = static_cast(y); + + if (within_bounds_2d(iy, ix, H, W)) { + return data[iy * sH + ix * sW]; + } + return static_cast(0); } template -static inline scalar_t cubic_convolution1(scalar_t x, scalar_t A) -{ - return ((A + 2) * x - (A + 3)) * x * x + 1; +static inline scalar_t cubic_convolution1(scalar_t x, scalar_t A) { + return ((A + 2) * x - (A + 3)) * x * x + 1; } template -static inline scalar_t cubic_convolution2(scalar_t x, scalar_t A) -{ - return ((A * x - 5 * A) * x + 8 * A) * x - 4 * A; +static inline scalar_t cubic_convolution2(scalar_t x, scalar_t A) { + return ((A * x - 5 * A) * x + 8 * A) * x - 4 * A; } template -static inline void get_cubic_upsample_coefficients( - scalar_t coeffs[4], - scalar_t t) -{ - scalar_t A = -0.75; - - scalar_t x1 = t; - coeffs[0] = cubic_convolution2(x1 + 1.0, A); - coeffs[1] = cubic_convolution1(x1, A); - - // opposite coefficients - scalar_t x2 = 1.0 - t; - coeffs[2] = cubic_convolution1(x2, A); - coeffs[3] = cubic_convolution2(x2 + 1.0, A); +static inline void get_cubic_upsample_coefficients(scalar_t coeffs[4], + scalar_t t) { + scalar_t A = -0.75; + + scalar_t x1 = t; + coeffs[0] = cubic_convolution2(x1 + 1.0, A); + coeffs[1] = cubic_convolution1(x1, A); + + // opposite coefficients + scalar_t x2 = 1.0 - t; + coeffs[2] = cubic_convolution1(x2, A); + coeffs[3] = cubic_convolution2(x2 + 1.0, A); } template -static inline scalar_t cubic_interp1d( - scalar_t x0, - scalar_t x1, - scalar_t x2, - scalar_t x3, - scalar_t t) -{ - scalar_t coeffs[4]; - get_cubic_upsample_coefficients(coeffs, t); - - return x0 * coeffs[0] + x1 * coeffs[1] + x2 * coeffs[2] + x3 * coeffs[3]; +static inline scalar_t cubic_interp1d(scalar_t x0, scalar_t x1, scalar_t x2, + scalar_t x3, scalar_t t) { + scalar_t coeffs[4]; + get_cubic_upsample_coefficients(coeffs, t); + + return x0 * coeffs[0] + x1 * coeffs[1] + x2 * coeffs[2] + x3 * coeffs[3]; } -void GridSampleKernel::Compute(OrtKernelContext *context) -{ - const bool align_corners = align_corners_; - const int64_t padding_mode = padding_mode_; - const int64_t interpolation_mode = interpolation_mode_; - - const OrtValue *input = ort_.KernelContext_GetInput(context, 0); - const float *input_data = - reinterpret_cast(ort_.GetTensorData(input)); - - const OrtValue *grid = ort_.KernelContext_GetInput(context, 1); - const float *grid_data = - reinterpret_cast(ort_.GetTensorData(grid)); - - OrtTensorDimensions input_dims(ort_, input); - OrtTensorDimensions grid_dims(ort_, grid); - int64_t N = input_dims[0]; - int64_t C = input_dims[1]; - int64_t inp_H = input_dims[2]; - int64_t inp_W = input_dims[3]; - int64_t out_H = grid_dims[1]; - int64_t out_W = grid_dims[2]; - - std::vector output_dims = {N, C, out_H, out_W}; - OrtValue *output = ort_.KernelContext_GetOutput(context, 0, - output_dims.data(), - output_dims.size()); - float *out_ptr = ort_.GetTensorMutableData(output); - - int64_t inp_sN = input_dims[1] * input_dims[2] * input_dims[3]; - int64_t inp_sC = input_dims[2] * input_dims[3]; - int64_t inp_sH = input_dims[3]; - int64_t inp_sW = 1; - int64_t grid_sN = grid_dims[1] * grid_dims[2] * grid_dims[3]; - int64_t grid_sH = grid_dims[2] * grid_dims[3]; - int64_t grid_sW = grid_dims[3]; - int64_t grid_sCoor = 1; - int64_t out_sN = output_dims[1] * output_dims[2] * output_dims[3]; - int64_t out_sC = output_dims[2] * output_dims[3]; - int64_t out_sH = output_dims[3]; - int64_t out_sW = 1; - - // loop over each output pixel - for (int64_t n = 0; n < N; ++n) - { - const float *grid_ptr_N = grid_data + n * grid_sN; - const float *inp_ptr_N = input_data + n * inp_sN; - for (int64_t h = 0; h < out_H; ++h) - { - for (int64_t w = 0; w < out_W; ++w) - { - const float *grid_ptr_NHW = grid_ptr_N + h * grid_sH + w * grid_sW; - float x = *grid_ptr_NHW; - float y = grid_ptr_NHW[grid_sCoor]; - - float ix = grid_sampler_compute_source_index(x, inp_W, padding_mode, align_corners); - float iy = grid_sampler_compute_source_index(y, inp_H, padding_mode, align_corners); - - if (interpolation_mode == GridSamplerInterpolation::Bilinear) - { - // get corner pixel values from (x, y) - // for 4d, we use north-east-south-west - int64_t ix_nw = static_cast(std::floor(ix)); - int64_t iy_nw = static_cast(std::floor(iy)); - - int64_t ix_ne = ix_nw + 1; - int64_t iy_ne = iy_nw; - - int64_t ix_sw = ix_nw; - int64_t iy_sw = iy_nw + 1; - - int64_t ix_se = ix_nw + 1; - int64_t iy_se = iy_nw + 1; - - // get surfaces to each neighbor: - float nw = (ix_se - ix) * (iy_se - iy); - float ne = (ix - ix_sw) * (iy_sw - iy); - float sw = (ix_ne - ix) * (iy - iy_ne); - float se = (ix - ix_nw) * (iy - iy_nw); - - // calculate bilinear weighted pixel value and set output pixel - const float *inp_ptr_NC = inp_ptr_N; - float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW; - for (int64_t c = 0; c < C; ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) - { - auto res = static_cast(0); - if (within_bounds_2d(iy_nw, ix_nw, inp_H, inp_W)) - { - res += inp_ptr_NC[iy_nw * inp_sH + ix_nw * inp_sW] * nw; - } - if (within_bounds_2d(iy_ne, ix_ne, inp_H, inp_W)) - { - res += inp_ptr_NC[iy_ne * inp_sH + ix_ne * inp_sW] * ne; - } - if (within_bounds_2d(iy_sw, ix_sw, inp_H, inp_W)) - { - res += inp_ptr_NC[iy_sw * inp_sH + ix_sw * inp_sW] * sw; - } - if (within_bounds_2d(iy_se, ix_se, inp_H, inp_W)) - { - res += inp_ptr_NC[iy_se * inp_sH + ix_se * inp_sW] * se; - } - *out_ptr_NCHW = res; - } - } - else if (interpolation_mode == GridSamplerInterpolation::Nearest) - { - int64_t ix_nearest = static_cast(std::nearbyint(ix)); - int64_t iy_nearest = static_cast(std::nearbyint(iy)); - - // assign nearest neighor pixel value to output pixel - float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW; - const float *inp_ptr_NC = inp_ptr_N; - for (int64_t c = 0; c < C; ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) - { - if (within_bounds_2d(iy_nearest, ix_nearest, inp_H, inp_W)) - { - *out_ptr_NCHW = inp_ptr_NC[iy_nearest * inp_sH + ix_nearest * inp_sW]; - } - else - { - *out_ptr_NCHW = static_cast(0); - } - } - } - else if (interpolation_mode == GridSamplerInterpolation::Bicubic) - { - // grid_sampler_compute_source_index will "clip the value" of idx depends on the padding, - // which would cause calculation to be wrong, - // for example x = -0.1 -> ix = 0 for zero padding, but in bicubic ix = floor(x) = -1 - // There would be more problem in reflection padding, since the -1 and +1 direction is not fixed in boundary condition - ix = grid_sampler_unnormalize(x, inp_W, align_corners); - iy = grid_sampler_unnormalize(y, inp_H, align_corners); - - float ix_nw = std::floor(ix); - float iy_nw = std::floor(iy); - - const float tx = ix - ix_nw; - const float ty = iy - iy_nw; - - const float *inp_ptr_NC = inp_ptr_N; - float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW; - for (int64_t c = 0; c < C; ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) - { - float coefficients[4]; - - // Interpolate 4 values in the x directon - for (int64_t i = 0; i < 4; ++i) - { - coefficients[i] = cubic_interp1d( - get_value_bounded(inp_ptr_NC, ix_nw - 1, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), - get_value_bounded(inp_ptr_NC, ix_nw + 0, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), - get_value_bounded(inp_ptr_NC, ix_nw + 1, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), - get_value_bounded(inp_ptr_NC, ix_nw + 2, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners), - tx); - } - - // Interpolate in the y direction - *out_ptr_NCHW = cubic_interp1d(coefficients[0], coefficients[1], coefficients[2], coefficients[3], ty); - } - } +void GridSampleKernel::Compute(OrtKernelContext *context) { + const bool align_corners = align_corners_; + const int64_t padding_mode = padding_mode_; + const int64_t interpolation_mode = interpolation_mode_; + + const OrtValue *input = ort_.KernelContext_GetInput(context, 0); + const float *input_data = + reinterpret_cast(ort_.GetTensorData(input)); + + const OrtValue *grid = ort_.KernelContext_GetInput(context, 1); + const float *grid_data = + reinterpret_cast(ort_.GetTensorData(grid)); + + OrtTensorDimensions input_dims(ort_, input); + OrtTensorDimensions grid_dims(ort_, grid); + int64_t N = input_dims[0]; + int64_t C = input_dims[1]; + int64_t inp_H = input_dims[2]; + int64_t inp_W = input_dims[3]; + int64_t out_H = grid_dims[1]; + int64_t out_W = grid_dims[2]; + + std::vector output_dims = {N, C, out_H, out_W}; + OrtValue *output = ort_.KernelContext_GetOutput( + context, 0, output_dims.data(), output_dims.size()); + float *out_ptr = ort_.GetTensorMutableData(output); + + int64_t inp_sN = input_dims[1] * input_dims[2] * input_dims[3]; + int64_t inp_sC = input_dims[2] * input_dims[3]; + int64_t inp_sH = input_dims[3]; + int64_t inp_sW = 1; + int64_t grid_sN = grid_dims[1] * grid_dims[2] * grid_dims[3]; + int64_t grid_sH = grid_dims[2] * grid_dims[3]; + int64_t grid_sW = grid_dims[3]; + int64_t grid_sCoor = 1; + int64_t out_sN = output_dims[1] * output_dims[2] * output_dims[3]; + int64_t out_sC = output_dims[2] * output_dims[3]; + int64_t out_sH = output_dims[3]; + int64_t out_sW = 1; + + // loop over each output pixel + for (int64_t n = 0; n < N; ++n) { + const float *grid_ptr_N = grid_data + n * grid_sN; + const float *inp_ptr_N = input_data + n * inp_sN; + for (int64_t h = 0; h < out_H; ++h) { + for (int64_t w = 0; w < out_W; ++w) { + const float *grid_ptr_NHW = grid_ptr_N + h * grid_sH + w * grid_sW; + float x = *grid_ptr_NHW; + float y = grid_ptr_NHW[grid_sCoor]; + + float ix = grid_sampler_compute_source_index(x, inp_W, padding_mode, + align_corners); + float iy = grid_sampler_compute_source_index(y, inp_H, padding_mode, + align_corners); + + if (interpolation_mode == GridSamplerInterpolation::Bilinear) { + // get corner pixel values from (x, y) + // for 4d, we use north-east-south-west + int64_t ix_nw = static_cast(std::floor(ix)); + int64_t iy_nw = static_cast(std::floor(iy)); + + int64_t ix_ne = ix_nw + 1; + int64_t iy_ne = iy_nw; + + int64_t ix_sw = ix_nw; + int64_t iy_sw = iy_nw + 1; + + int64_t ix_se = ix_nw + 1; + int64_t iy_se = iy_nw + 1; + + // get surfaces to each neighbor: + float nw = (ix_se - ix) * (iy_se - iy); + float ne = (ix - ix_sw) * (iy_sw - iy); + float sw = (ix_ne - ix) * (iy - iy_ne); + float se = (ix - ix_nw) * (iy - iy_nw); + + // calculate bilinear weighted pixel value and set output pixel + const float *inp_ptr_NC = inp_ptr_N; + float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW; + for (int64_t c = 0; c < C; + ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) { + auto res = static_cast(0); + if (within_bounds_2d(iy_nw, ix_nw, inp_H, inp_W)) { + res += inp_ptr_NC[iy_nw * inp_sH + ix_nw * inp_sW] * nw; + } + if (within_bounds_2d(iy_ne, ix_ne, inp_H, inp_W)) { + res += inp_ptr_NC[iy_ne * inp_sH + ix_ne * inp_sW] * ne; } + if (within_bounds_2d(iy_sw, ix_sw, inp_H, inp_W)) { + res += inp_ptr_NC[iy_sw * inp_sH + ix_sw * inp_sW] * sw; + } + if (within_bounds_2d(iy_se, ix_se, inp_H, inp_W)) { + res += inp_ptr_NC[iy_se * inp_sH + ix_se * inp_sW] * se; + } + *out_ptr_NCHW = res; + } + } else if (interpolation_mode == GridSamplerInterpolation::Nearest) { + int64_t ix_nearest = static_cast(std::nearbyint(ix)); + int64_t iy_nearest = static_cast(std::nearbyint(iy)); + + // assign nearest neighor pixel value to output pixel + float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW; + const float *inp_ptr_NC = inp_ptr_N; + for (int64_t c = 0; c < C; + ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) { + if (within_bounds_2d(iy_nearest, ix_nearest, inp_H, inp_W)) { + *out_ptr_NCHW = + inp_ptr_NC[iy_nearest * inp_sH + ix_nearest * inp_sW]; + } else { + *out_ptr_NCHW = static_cast(0); + } + } + } else if (interpolation_mode == GridSamplerInterpolation::Bicubic) { + // grid_sampler_compute_source_index will "clip the value" of idx + // depends on the padding, + // which would cause calculation to be wrong, + // for example x = -0.1 -> ix = 0 for zero padding, but in bicubic ix + // = floor(x) = -1 + // There would be more problem in reflection padding, since the -1 and + // +1 direction is not fixed in boundary condition + ix = grid_sampler_unnormalize(x, inp_W, align_corners); + iy = grid_sampler_unnormalize(y, inp_H, align_corners); + + float ix_nw = std::floor(ix); + float iy_nw = std::floor(iy); + + const float tx = ix - ix_nw; + const float ty = iy - iy_nw; + + const float *inp_ptr_NC = inp_ptr_N; + float *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW; + for (int64_t c = 0; c < C; + ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) { + float coefficients[4]; + + // Interpolate 4 values in the x directon + for (int64_t i = 0; i < 4; ++i) { + coefficients[i] = cubic_interp1d( + get_value_bounded(inp_ptr_NC, ix_nw - 1, iy_nw - 1 + i, + inp_W, inp_H, inp_sW, inp_sH, + padding_mode, align_corners), + get_value_bounded(inp_ptr_NC, ix_nw + 0, iy_nw - 1 + i, + inp_W, inp_H, inp_sW, inp_sH, + padding_mode, align_corners), + get_value_bounded(inp_ptr_NC, ix_nw + 1, iy_nw - 1 + i, + inp_W, inp_H, inp_sW, inp_sH, + padding_mode, align_corners), + get_value_bounded(inp_ptr_NC, ix_nw + 2, iy_nw - 1 + i, + inp_W, inp_H, inp_sW, inp_sH, + padding_mode, align_corners), + tx); + } + + // Interpolate in the y direction + *out_ptr_NCHW = + cubic_interp1d(coefficients[0], coefficients[1], + coefficients[2], coefficients[3], ty); + } } + } } + } } From d0f92d6d64733f82e0866d2100c614d92edabe7f Mon Sep 17 00:00:00 2001 From: tangyanfei Date: Wed, 31 Mar 2021 12:11:41 +0800 Subject: [PATCH 04/15] update code --- tests/test_ops/test_onnx.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tests/test_ops/test_onnx.py b/tests/test_ops/test_onnx.py index 4faf1f95a2d..eb5cd320648 100644 --- a/tests/test_ops/test_onnx.py +++ b/tests/test_ops/test_onnx.py @@ -9,6 +9,8 @@ import torch import torch.nn as nn from packaging import version +from torch.onnx.symbolic_helper import parse_args +from torch.onnx.symbolic_registry import register_op onnx_file = 'tmp.onnx' @@ -30,9 +32,24 @@ def forward(self, x, y): return res +@parse_args('v', 'v', 'i', 'i', 'i') +def grid_sampler(g, + input, + grid, + interpolation_mode, + padding_mode, + align_corners=False): + return g.op( + 'mmcv::grid_sampler', + input, + grid, + interpolation_mode_i=interpolation_mode, + padding_mode_i=padding_mode, + align_corners_i=align_corners) + + def test_grid_sampler(): - from mmcv.onnx.symbolic import register_extra_symbolics - register_extra_symbolics() + register_op('grid_sampler', grid_sampler, '', 11) input = torch.ones(1, 1, 2, 2) out_h = 4 out_w = 4 From 285f0487efe0e2c1de9a2399e7cabbec0826b17d Mon Sep 17 00:00:00 2001 From: tangyanfei Date: Wed, 31 Mar 2021 16:35:53 +0800 Subject: [PATCH 05/15] update code --- mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp | 15 +++++---- .../onnxruntime/cpu/onnxruntime_register.cpp | 8 ++--- mmcv/ops/csrc/onnxruntime/grid_sample.h | 32 +++++++++---------- 3 files changed, 28 insertions(+), 27 deletions(-) diff --git a/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp b/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp index 458ec16e3e6..2b4582facbc 100644 --- a/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp +++ b/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp @@ -6,7 +6,7 @@ #define MIN(a, b) (((a) < (b)) ? (a) : (b)) #define MAX(a, b) (((a) < (b)) ? (b) : (a)) -#define CLIP_COORDINATES(in, out, clip_limit) \ +#define CLIP_COORDINATES(in, out, clip_limit) \ out = MIN((clip_limit - 1), MAX(in, 0)) GridSampleKernel::GridSampleKernel(OrtApi api, const OrtKernelInfo *info) @@ -81,8 +81,10 @@ static inline scalar_t compute_coordinates(scalar_t coord, int64_t size, // Computes the pixel source index value for a grid coordinate template static inline scalar_t -grid_sampler_compute_source_index(scalar_t coord, int64_t size, - int64_t padding_mode, bool align_corners) { +grid_sampler_compute_source_index(scalar_t coord, + int64_t size, + int64_t padding_mode, + bool align_corners) { coord = grid_sampler_unnormalize(coord, size, align_corners); coord = compute_coordinates(coord, size, padding_mode, align_corners); return coord; @@ -95,10 +97,11 @@ static inline bool within_bounds_2d(int64_t h, int64_t w, int64_t H, template static inline scalar_t -get_value_bounded(const scalar_t *data, scalar_t x, scalar_t y, int64_t W, - int64_t H, int64_t sW, int64_t sH, int64_t padding_mode, +get_value_bounded(const scalar_t *data, scalar_t x, + scalar_t y, int64_t W, int64_t H, + int64_t sW, int64_t sH, + int64_t padding_mode, bool align_corners) { - x = compute_coordinates(x, W, padding_mode, align_corners); y = compute_coordinates(y, H, padding_mode, align_corners); diff --git a/mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp b/mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp index 06196b8110f..257dda443d8 100644 --- a/mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp +++ b/mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp @@ -1,10 +1,10 @@ #include "onnxruntime_register.h" +#include "grid_sample.h" #include "nms.h" #include "ort_mmcv_utils.h" #include "roi_align.h" #include "soft_nms.h" -#include "grid_sample.h" const char *c_MMCVOpDomain = "mmcv"; SoftNmsOp c_SoftNmsOp; @@ -29,13 +29,11 @@ OrtStatus *ORT_API_CALL RegisterCustomOps(OrtSessionOptions *options, return status; } - if (auto status = - ortApi->CustomOpDomain_Add(domain, &c_MMCVRoiAlignCustomOp)) { + if (auto status = ortApi->CustomOpDomain_Add(domain, &c_MMCVRoiAlignCustomOp)) { return status; } - if (auto status = - ortApi->CustomOpDomain_Add(domain, &c_GridSampleOp)) { + if (auto status = ortApi->CustomOpDomain_Add(domain, &c_GridSampleOp)) { return status; } diff --git a/mmcv/ops/csrc/onnxruntime/grid_sample.h b/mmcv/ops/csrc/onnxruntime/grid_sample.h index b10555cf13d..09cf0ad8edb 100644 --- a/mmcv/ops/csrc/onnxruntime/grid_sample.h +++ b/mmcv/ops/csrc/onnxruntime/grid_sample.h @@ -8,7 +8,7 @@ struct GridSampleKernel { void Compute(OrtKernelContext *context); - protected: +protected: OrtApi api_; Ort::CustomOpApi ort_; const OrtKernelInfo *info_; @@ -20,24 +20,24 @@ struct GridSampleKernel { }; struct GridSampleOp : Ort::CustomOpBase { - void* CreateKernel(OrtApi api, const OrtKernelInfo *info) const { - return new GridSampleKernel(api, info); - }; + void *CreateKernel(OrtApi api, const OrtKernelInfo *info) const { + return new GridSampleKernel(api, info); + }; - const char* GetName() const { return "grid_sampler"; }; + const char *GetName() const { return "grid_sampler"; }; - size_t GetInputTypeCount() const { return 2; }; - ONNXTensorElementDataType GetInputType(size_t /*index*/) const { - return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; - }; + size_t GetInputTypeCount() const { return 2; }; + ONNXTensorElementDataType GetInputType(size_t /*index*/) const { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; + }; - size_t GetOutputTypeCount() const { return 1; }; - ONNXTensorElementDataType GetOutputType(size_t /*index*/) const { - return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; - }; + size_t GetOutputTypeCount() const { return 1; }; + ONNXTensorElementDataType GetOutputType(size_t /*index*/) const { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; + }; - const char* GetExecutionProviderType() const { - return "CPUExecutionProvider"; - }; + const char *GetExecutionProviderType() const { + return "CPUExecutionProvider"; + }; }; #endif From 7874b74ac6d73750be7633266f00245488ca3e56 Mon Sep 17 00:00:00 2001 From: tangyanfei Date: Wed, 31 Mar 2021 16:57:39 +0800 Subject: [PATCH 06/15] update code --- mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp | 24 +++++++++----------- mmcv/ops/csrc/onnxruntime/grid_sample.h | 2 +- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp b/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp index 2b4582facbc..cdefdcc4b3a 100644 --- a/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp +++ b/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp @@ -1,8 +1,8 @@ -#include "grid_sample.h" +#include #include "../ort_mmcv_utils.h" +#include "grid_sample.h" -#include #define MIN(a, b) (((a) < (b)) ? (a) : (b)) #define MAX(a, b) (((a) < (b)) ? (b) : (a)) @@ -80,11 +80,10 @@ static inline scalar_t compute_coordinates(scalar_t coord, int64_t size, // Computes the pixel source index value for a grid coordinate template -static inline scalar_t -grid_sampler_compute_source_index(scalar_t coord, - int64_t size, - int64_t padding_mode, - bool align_corners) { +static inline scalar_t grid_sampler_compute_source_index(scalar_t coord, + int64_t size, + int64_t padding_mode, + bool align_corners) { coord = grid_sampler_unnormalize(coord, size, align_corners); coord = compute_coordinates(coord, size, padding_mode, align_corners); return coord; @@ -96,12 +95,11 @@ static inline bool within_bounds_2d(int64_t h, int64_t w, int64_t H, } template -static inline scalar_t -get_value_bounded(const scalar_t *data, scalar_t x, - scalar_t y, int64_t W, int64_t H, - int64_t sW, int64_t sH, - int64_t padding_mode, - bool align_corners) { +static inline scalar_t get_value_bounded(const scalar_t *data, scalar_t x, + scalar_t y, int64_t W, int64_t H, + int64_t sW, int64_t sH, + int64_t padding_mode, + bool align_corners) { x = compute_coordinates(x, W, padding_mode, align_corners); y = compute_coordinates(y, H, padding_mode, align_corners); diff --git a/mmcv/ops/csrc/onnxruntime/grid_sample.h b/mmcv/ops/csrc/onnxruntime/grid_sample.h index 09cf0ad8edb..923cf7e03ce 100644 --- a/mmcv/ops/csrc/onnxruntime/grid_sample.h +++ b/mmcv/ops/csrc/onnxruntime/grid_sample.h @@ -8,7 +8,7 @@ struct GridSampleKernel { void Compute(OrtKernelContext *context); -protected: + protected: OrtApi api_; Ort::CustomOpApi ort_; const OrtKernelInfo *info_; From 7bf5aa39c45b770e5f5a68ac64e01ee6392d0e2f Mon Sep 17 00:00:00 2001 From: tangyanfei Date: Wed, 31 Mar 2021 17:26:11 +0800 Subject: [PATCH 07/15] update code --- tests/test_ops/test_onnx.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_ops/test_onnx.py b/tests/test_ops/test_onnx.py index eb5cd320648..5c352474b98 100644 --- a/tests/test_ops/test_onnx.py +++ b/tests/test_ops/test_onnx.py @@ -67,9 +67,11 @@ def test_grid_sampler(): from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() + if not os.path.exists(ort_custom_op_path): + pytest.skip('nms for onnxruntime is not compiled.') + session_options = rt.SessionOptions() - if os.path.exists(ort_custom_op_path): - session_options.register_custom_ops_library(ort_custom_op_path) + session_options.register_custom_ops_library(ort_custom_op_path) sess = rt.InferenceSession(onnx_file, session_options) input_feature = input.cpu().numpy() grid_feature = grid.cpu().numpy() From 80c18ea47d652ff7a2b934e4102bc33b6d92341e Mon Sep 17 00:00:00 2001 From: tangyanfei Date: Wed, 31 Mar 2021 19:16:30 +0800 Subject: [PATCH 08/15] update code --- mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp | 1 - tests/test_ops/test_onnx.py | 8 +++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp b/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp index cdefdcc4b3a..1a5b6715061 100644 --- a/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp +++ b/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp @@ -3,7 +3,6 @@ #include "../ort_mmcv_utils.h" #include "grid_sample.h" - #define MIN(a, b) (((a) < (b)) ? (a) : (b)) #define MAX(a, b) (((a) < (b)) ? (b) : (a)) #define CLIP_COORDINATES(in, out, clip_limit) \ diff --git a/tests/test_ops/test_onnx.py b/tests/test_ops/test_onnx.py index 5c352474b98..45c678d8d77 100644 --- a/tests/test_ops/test_onnx.py +++ b/tests/test_ops/test_onnx.py @@ -61,7 +61,13 @@ def test_grid_sampler(): grid = grid.unsqueeze(0).repeat(1, 1, 1, 1) model = GridSample() - torch.onnx.export(model, (input, grid), onnx_file, opset_version=11) + with torch.no_grad(): + torch.onnx.export( + model, (input, grid), + onnx_file, + export_params=True, + keep_initializers_as_inputs=True, + opset_version=11) pytorch_output = model(input, grid) From 121962544e609985d941c14a822d5d9b9e8c68ab Mon Sep 17 00:00:00 2001 From: tangyanfei Date: Wed, 31 Mar 2021 19:27:38 +0800 Subject: [PATCH 09/15] update code --- tests/test_ops/test_onnx.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_ops/test_onnx.py b/tests/test_ops/test_onnx.py index 45c678d8d77..4f46eaea659 100644 --- a/tests/test_ops/test_onnx.py +++ b/tests/test_ops/test_onnx.py @@ -60,6 +60,11 @@ def test_grid_sampler(): grid[:, :, 1] = h.unsqueeze(0).repeat(out_w, 1).transpose(0, 1) grid = grid.unsqueeze(0).repeat(1, 1, 1, 1) + from mmcv.ops import get_onnxruntime_op_path + ort_custom_op_path = get_onnxruntime_op_path() + if not os.path.exists(ort_custom_op_path): + pytest.skip('nms for onnxruntime is not compiled.') + model = GridSample() with torch.no_grad(): torch.onnx.export( @@ -71,11 +76,6 @@ def test_grid_sampler(): pytorch_output = model(input, grid) - from mmcv.ops import get_onnxruntime_op_path - ort_custom_op_path = get_onnxruntime_op_path() - if not os.path.exists(ort_custom_op_path): - pytest.skip('nms for onnxruntime is not compiled.') - session_options = rt.SessionOptions() session_options.register_custom_ops_library(ort_custom_op_path) sess = rt.InferenceSession(onnx_file, session_options) From 36e98a1e1af9c15855fee2c96dfa8b11872333a0 Mon Sep 17 00:00:00 2001 From: tangyanfei Date: Wed, 31 Mar 2021 19:34:20 +0800 Subject: [PATCH 10/15] update code --- mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp b/mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp index 257dda443d8..cd65412a52c 100644 --- a/mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp +++ b/mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp @@ -29,7 +29,8 @@ OrtStatus *ORT_API_CALL RegisterCustomOps(OrtSessionOptions *options, return status; } - if (auto status = ortApi->CustomOpDomain_Add(domain, &c_MMCVRoiAlignCustomOp)) { + if (auto status = + ortApi->CustomOpDomain_Add(domain, &c_MMCVRoiAlignCustomOp)) { return status; } From 3c9037f422757d9b0a913f3a871b80346fffd167 Mon Sep 17 00:00:00 2001 From: tangyanfei Date: Thu, 1 Apr 2021 16:40:51 +0800 Subject: [PATCH 11/15] update code --- mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp | 3 + tests/test_ops/test_onnx.py | 91 +++++++++----------- 2 files changed, 46 insertions(+), 48 deletions(-) diff --git a/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp b/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp index 1a5b6715061..c60c1022211 100644 --- a/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp +++ b/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp @@ -145,6 +145,9 @@ static inline scalar_t cubic_interp1d(scalar_t x0, scalar_t x1, scalar_t x2, return x0 * coeffs[0] + x1 * coeffs[1] + x2 * coeffs[2] + x3 * coeffs[3]; } +// modified from +// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/GridSampler.cpp + void GridSampleKernel::Compute(OrtKernelContext *context) { const bool align_corners = align_corners_; const int64_t padding_mode = padding_mode_; diff --git a/tests/test_ops/test_onnx.py b/tests/test_ops/test_onnx.py index 4f46eaea659..9a04f82e8c5 100644 --- a/tests/test_ops/test_onnx.py +++ b/tests/test_ops/test_onnx.py @@ -9,8 +9,6 @@ import torch import torch.nn as nn from packaging import version -from torch.onnx.symbolic_helper import parse_args -from torch.onnx.symbolic_registry import register_op onnx_file = 'tmp.onnx' @@ -25,31 +23,7 @@ def forward(self, *args, **kwargs): return self.wrapped_function(*args, **kwargs) -class GridSample(torch.nn.Module): - - def forward(self, x, y): - res = torch.nn.functional.grid_sample(x, grid=y, align_corners=False) - return res - - -@parse_args('v', 'v', 'i', 'i', 'i') -def grid_sampler(g, - input, - grid, - interpolation_mode, - padding_mode, - align_corners=False): - return g.op( - 'mmcv::grid_sampler', - input, - grid, - interpolation_mode_i=interpolation_mode, - padding_mode_i=padding_mode, - align_corners_i=align_corners) - - def test_grid_sampler(): - register_op('grid_sampler', grid_sampler, '', 11) input = torch.ones(1, 1, 2, 2) out_h = 4 out_w = 4 @@ -59,31 +33,52 @@ def test_grid_sampler(): grid[:, :, 0] = w.unsqueeze(0).repeat(out_h, 1) grid[:, :, 1] = h.unsqueeze(0).repeat(out_w, 1).transpose(0, 1) grid = grid.unsqueeze(0).repeat(1, 1, 1, 1) - from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() if not os.path.exists(ort_custom_op_path): - pytest.skip('nms for onnxruntime is not compiled.') - - model = GridSample() - with torch.no_grad(): - torch.onnx.export( - model, (input, grid), - onnx_file, - export_params=True, - keep_initializers_as_inputs=True, - opset_version=11) - - pytorch_output = model(input, grid) - - session_options = rt.SessionOptions() - session_options.register_custom_ops_library(ort_custom_op_path) - sess = rt.InferenceSession(onnx_file, session_options) - input_feature = input.cpu().numpy() - grid_feature = grid.cpu().numpy() - onnx_output = sess.run(None, {'x': input_feature, 'y': grid_feature}) - os.remove(onnx_file) - assert np.allclose(pytorch_output, onnx_output, atol=1e-3) + pytest.skip('grid_sample for onnxruntime is not compiled.') + + inter_modes = ['bilinear', 'nearest'] + padding_modes = ['zeros', 'border', 'reflection'] + corners = [False, True] + for inter_mode in inter_modes: + for padding_mode in padding_modes: + for align_corner in corners: + pytorch_output = \ + torch.nn.functional.grid_sample(input, + grid, + mode=inter_mode, + padding_mode=padding_mode, + align_corners=align_corner) + wrapped_model = WrapFunction(torch.nn.functional.grid_sample) + wrapped_model.cpu().eval() + + from mmcv.onnx.symbolic import register_extra_symbolics + opset_version = 11 + register_extra_symbolics(opset_version) + with torch.no_grad(): + torch.onnx.export( + wrapped_model, (input, grid), + onnx_file, + export_params=True, + keep_initializers_as_inputs=True, + input_names=['input', 'grid'], + opset_version=11) + + session_options = rt.SessionOptions() + session_options.register_custom_ops_library(ort_custom_op_path) + sess = rt.InferenceSession(onnx_file, session_options) + input_feature = input.cpu().numpy() + grid_feature = grid.cpu().numpy() + onnx_output = sess.run(None, { + 'input': input_feature, + 'grid': grid_feature + }) + os.remove(onnx_file) + assert np.allclose(pytorch_output, onnx_output, atol=1e-3) + + +test_grid_sampler() def test_nms(): From 4d8d95222221806dddc1b8b78bc714448e6e03c4 Mon Sep 17 00:00:00 2001 From: tangyanfei Date: Thu, 1 Apr 2021 16:59:22 +0800 Subject: [PATCH 12/15] update code --- mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp | 6 +- tests/test_ops/test_onnx.py | 107 ++++++++++--------- 2 files changed, 57 insertions(+), 56 deletions(-) diff --git a/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp b/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp index c60c1022211..ec5ad330f9b 100644 --- a/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp +++ b/mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp @@ -8,6 +8,9 @@ #define CLIP_COORDINATES(in, out, clip_limit) \ out = MIN((clip_limit - 1), MAX(in, 0)) +// modified from +// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/GridSampler.cpp + GridSampleKernel::GridSampleKernel(OrtApi api, const OrtKernelInfo *info) : api_(api), ort_(api_), info_(info) { align_corners_ = ort_.KernelInfoGetAttribute(info, "align_corners"); @@ -145,9 +148,6 @@ static inline scalar_t cubic_interp1d(scalar_t x0, scalar_t x1, scalar_t x2, return x0 * coeffs[0] + x1 * coeffs[1] + x2 * coeffs[2] + x3 * coeffs[3]; } -// modified from -// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/GridSampler.cpp - void GridSampleKernel::Compute(OrtKernelContext *context) { const bool align_corners = align_corners_; const int64_t padding_mode = padding_mode_; diff --git a/tests/test_ops/test_onnx.py b/tests/test_ops/test_onnx.py index 9a04f82e8c5..6a8de8a5213 100644 --- a/tests/test_ops/test_onnx.py +++ b/tests/test_ops/test_onnx.py @@ -23,62 +23,63 @@ def forward(self, *args, **kwargs): return self.wrapped_function(*args, **kwargs) -def test_grid_sampler(): - input = torch.ones(1, 1, 2, 2) - out_h = 4 - out_w = 4 - h = torch.linspace(-1, 1, out_h) - w = torch.linspace(-1, 1, out_w) - grid = torch.zeros(out_h, out_w, 2) - grid[:, :, 0] = w.unsqueeze(0).repeat(out_h, 1) - grid[:, :, 1] = h.unsqueeze(0).repeat(out_w, 1).transpose(0, 1) - grid = grid.unsqueeze(0).repeat(1, 1, 1, 1) +@pytest.mark.parametrize('mode', ['bilinear', 'nearest']) +@pytest.mark.parametrize('padding_mode', ['zeros', 'border', 'reflection']) +@pytest.mark.parametrize('align_corners', [True, False]) +def test_grid_sample(mode, padding_mode, align_corners): + from mmcv.onnx.symbolic import register_extra_symbolics + opset_version = 11 + register_extra_symbolics(opset_version) + + input = torch.rand(1, 1, 10, 10) + grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]]) + grid = nn.functional.affine_grid(grid, (1, 1, 15, 15)).type_as(input) + + def func(input, grid): + return nn.functional.grid_sample( + input, + grid, + mode=mode, + padding_mode=padding_mode, + align_corners=align_corners) + + wrapped_model = WrapFunction(func).eval() + + input_names = ['input', 'grid'] + output_names = ['output'] + + with torch.no_grad(): + torch.onnx.export( + wrapped_model, (input.clone(), grid.clone()), + onnx_file, + export_params=True, + keep_initializers_as_inputs=True, + input_names=input_names, + output_names=output_names, + opset_version=11) + + onnx_model = onnx.load(onnx_file) + from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() if not os.path.exists(ort_custom_op_path): - pytest.skip('grid_sample for onnxruntime is not compiled.') - - inter_modes = ['bilinear', 'nearest'] - padding_modes = ['zeros', 'border', 'reflection'] - corners = [False, True] - for inter_mode in inter_modes: - for padding_mode in padding_modes: - for align_corner in corners: - pytorch_output = \ - torch.nn.functional.grid_sample(input, - grid, - mode=inter_mode, - padding_mode=padding_mode, - align_corners=align_corner) - wrapped_model = WrapFunction(torch.nn.functional.grid_sample) - wrapped_model.cpu().eval() - - from mmcv.onnx.symbolic import register_extra_symbolics - opset_version = 11 - register_extra_symbolics(opset_version) - with torch.no_grad(): - torch.onnx.export( - wrapped_model, (input, grid), - onnx_file, - export_params=True, - keep_initializers_as_inputs=True, - input_names=['input', 'grid'], - opset_version=11) - - session_options = rt.SessionOptions() - session_options.register_custom_ops_library(ort_custom_op_path) - sess = rt.InferenceSession(onnx_file, session_options) - input_feature = input.cpu().numpy() - grid_feature = grid.cpu().numpy() - onnx_output = sess.run(None, { - 'input': input_feature, - 'grid': grid_feature - }) - os.remove(onnx_file) - assert np.allclose(pytorch_output, onnx_output, atol=1e-3) - - -test_grid_sampler() + pytest.skip('nms for onnxruntime is not compiled.') + + session_options = rt.SessionOptions() + session_options.register_custom_ops_library(ort_custom_op_path) + + # get onnx output + input_all = [node.name for node in onnx_model.graph.input] + input_initializer = [node.name for node in onnx_model.graph.initializer] + net_feed_input = list(set(input_all) - set(input_initializer)) + assert (len(net_feed_input) == 2) + sess = rt.InferenceSession(onnx_file, session_options) + ort_result = sess.run(None, { + 'input': input.detach().numpy(), + 'grid': grid.detach().numpy() + }) + pytorch_results = wrapped_model(input.clone(), grid.clone()) + assert np.allclose(pytorch_results, ort_result, atol=1e-3) def test_nms(): From 3dece937a2fda428c231c2a5849498d63ee3ce50 Mon Sep 17 00:00:00 2001 From: tangyanfei Date: Thu, 1 Apr 2021 20:06:39 +0800 Subject: [PATCH 13/15] update code --- tests/test_ops/test_onnx.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_ops/test_onnx.py b/tests/test_ops/test_onnx.py index 6a8de8a5213..b7a8d559fad 100644 --- a/tests/test_ops/test_onnx.py +++ b/tests/test_ops/test_onnx.py @@ -31,6 +31,11 @@ def test_grid_sample(mode, padding_mode, align_corners): opset_version = 11 register_extra_symbolics(opset_version) + from mmcv.ops import get_onnxruntime_op_path + ort_custom_op_path = get_onnxruntime_op_path() + if not os.path.exists(ort_custom_op_path): + pytest.skip('nms for onnxruntime is not compiled.') + input = torch.rand(1, 1, 10, 10) grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]]) grid = nn.functional.affine_grid(grid, (1, 1, 15, 15)).type_as(input) @@ -60,11 +65,6 @@ def func(input, grid): onnx_model = onnx.load(onnx_file) - from mmcv.ops import get_onnxruntime_op_path - ort_custom_op_path = get_onnxruntime_op_path() - if not os.path.exists(ort_custom_op_path): - pytest.skip('nms for onnxruntime is not compiled.') - session_options = rt.SessionOptions() session_options.register_custom_ops_library(ort_custom_op_path) From 16b1c858b33bea29e8fd538feded57718a3fbe24 Mon Sep 17 00:00:00 2001 From: tangyanfei Date: Fri, 2 Apr 2021 13:28:37 +0800 Subject: [PATCH 14/15] update code --- tests/test_ops/test_onnx.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_ops/test_onnx.py b/tests/test_ops/test_onnx.py index b7a8d559fad..a4939177497 100644 --- a/tests/test_ops/test_onnx.py +++ b/tests/test_ops/test_onnx.py @@ -79,6 +79,7 @@ def func(input, grid): 'grid': grid.detach().numpy() }) pytorch_results = wrapped_model(input.clone(), grid.clone()) + os.remove(onnx_file) assert np.allclose(pytorch_results, ort_result, atol=1e-3) From 3f64a84b91ff0875e323d45e94e4a035b8151685 Mon Sep 17 00:00:00 2001 From: tangyanfei Date: Tue, 6 Apr 2021 17:28:39 +0800 Subject: [PATCH 15/15] update code --- tests/test_ops/test_onnx.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_ops/test_onnx.py b/tests/test_ops/test_onnx.py index a4939177497..487edbbf9da 100644 --- a/tests/test_ops/test_onnx.py +++ b/tests/test_ops/test_onnx.py @@ -34,7 +34,7 @@ def test_grid_sample(mode, padding_mode, align_corners): from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() if not os.path.exists(ort_custom_op_path): - pytest.skip('nms for onnxruntime is not compiled.') + pytest.skip('custom ops for onnxruntime are not compiled.') input = torch.rand(1, 1, 10, 10) grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]]) @@ -55,7 +55,7 @@ def func(input, grid): with torch.no_grad(): torch.onnx.export( - wrapped_model, (input.clone(), grid.clone()), + wrapped_model, (input, grid), onnx_file, export_params=True, keep_initializers_as_inputs=True,