|
5 | 5 | #include <THC/THCAtomics.cuh> |
6 | 6 | #include <THC/THCDeviceUtils.cuh> |
7 | 7 |
|
8 | | -// TODO make it in a common file |
9 | | -#define CUDA_1D_KERNEL_LOOP(i, n) \ |
10 | | - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ |
11 | | - i += blockDim.x * gridDim.x) |
| 8 | +#include "cuda_helpers.h" |
12 | 9 |
|
13 | 10 |
|
14 | 11 | template <typename T> |
@@ -48,7 +45,7 @@ __device__ T bilinear_interpolate(const T* input, |
48 | 45 | T ly = y - y_low; |
49 | 46 | T lx = x - x_low; |
50 | 47 | T hy = 1. - ly, hx = 1. - lx; |
51 | | - |
| 48 | + |
52 | 49 | // do bilinear interpolation |
53 | 50 | T v1 = input[y_low * width + x_low]; |
54 | 51 | T v2 = input[y_low * width + x_high]; |
@@ -171,7 +168,7 @@ __device__ void bilinear_interpolate_gradient( |
171 | 168 | } |
172 | 169 |
|
173 | 170 | template <typename T> |
174 | | -__global__ void RoIAlignBackwardFeature(const int nthreads, const T* grad_output, |
| 171 | +__global__ void RoIAlignBackward(const int nthreads, const T* grad_output, |
175 | 172 | const int num_rois, const T spatial_scale, |
176 | 173 | const int channels, const int height, const int width, |
177 | 174 | const int pooled_height, const int pooled_width, |
@@ -327,7 +324,7 @@ at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad, |
327 | 324 | int w_stride = grad.stride(3); |
328 | 325 |
|
329 | 326 | AT_DISPATCH_FLOATING_TYPES(grad.type(), "ROIAlign_backward", [&] { |
330 | | - RoIAlignBackwardFeature<scalar_t><<<grid, block, 0, stream>>>( |
| 327 | + RoIAlignBackward<scalar_t><<<grid, block, 0, stream>>>( |
331 | 328 | grad.numel(), |
332 | 329 | grad.data<scalar_t>(), |
333 | 330 | num_rois, |
|
0 commit comments