From aa83a1bec3cf11b1202089e488c50501403b961d Mon Sep 17 00:00:00 2001 From: haohongxiang Date: Mon, 12 Jul 2021 15:06:12 +0800 Subject: [PATCH 1/2] Fix gather_op by adding OurOfRangeCheck for param[Index], test=develop --- paddle/fluid/operators/gather.cu.h | 36 ++++++++++++++++++++++++++---- paddle/fluid/operators/gather.h | 22 +++++++++++++++++- 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/operators/gather.cu.h b/paddle/fluid/operators/gather.cu.h index 95cb428abdf34..2647ecff936b3 100644 --- a/paddle/fluid/operators/gather.cu.h +++ b/paddle/fluid/operators/gather.cu.h @@ -30,13 +30,21 @@ using platform::DeviceContext; template __global__ void GatherCUDAKernel(const T* params, const IndexT* indices, - T* output, size_t index_size, - size_t slice_size) { + T* output, size_t input_size, + size_t index_size, size_t slice_size, + size_t end_size) { CUDA_KERNEL_LOOP(i, index_size * slice_size) { int indices_i = i / slice_size; int slice_i = i - indices_i * slice_size; // offset inside the slice IndexT gather_i = indices[indices_i]; IndexT params_i = gather_i * slice_size + slice_i; + PADDLE_ENFORCE( + gather_i >= 0 && gather_i < input_size, + "The index is out of bounds, " + "please check whether the dimensions of index and " + "input meet the requirements. It should " + "be less than [%d] and greater than or equal to 0, but received [%d]", + input_size, gather_i); *(output + i) = *(params + params_i); } } @@ -58,7 +66,7 @@ __global__ void GatherNdCUDAKernel(const T* input, const int* input_dims, "The index is out of bounds, " "please check whether the dimensions of index and " "input meet the requirements. It should " - "be less than [%d] and greater or equal to 0, but received [%d]", + "be less than [%d] and greater than or equal to 0, but received [%d]", input_dims[j], index_value); gather_i += (index_value * temp); temp *= input_dims[j]; @@ -91,15 +99,26 @@ void GPUGather(const platform::DeviceContext& ctx, const Tensor& src, " the second dimension should be 1.")); } + const auto gplace = BOOST_GET_CONST(platform::CUDAPlace, ctx.GetPlace()); + auto cplace = platform::CPUPlace(); + auto index_dims = index.dims(); + auto index_dims_size = index_dims.size(); + auto input_dims = src.dims(); + auto input_dims_size = input_dims.size(); + int index_size = index.dims()[0]; auto src_dims = src.dims(); framework::DDim output_dims(src_dims); output_dims[0] = index_size; + // final dim + int64_t end_size = index_dims[index_dims_size - 1]; // slice size int slice_size = 1; for (int i = 1; i < src_dims.size(); ++i) slice_size *= src_dims[i]; + // input size + int input_size = src_dims[0] * slice_size; const T* p_src = src.data(); const IndexT* p_index = index.data(); @@ -112,7 +131,7 @@ void GPUGather(const platform::DeviceContext& ctx, const Tensor& src, GatherCUDAKernel<<< grid, block, 0, reinterpret_cast(ctx).stream()>>>( - p_src, p_index, p_output, index_size, slice_size); + p_src, p_index, p_output, input_size, index_size, slice_size, end_size); } template @@ -177,6 +196,15 @@ __global__ void GatherGPUKernel(const T* input, const U* index, T* out, int next_idx = idx - outer_size * inner_dim_index; int index_dim_index = next_idx / outer_dim_size; int index_val = index[index_dim_index]; + + PADDLE_ENFORCE( + index_val >= 0 && index_val < input_index_dim_size, + "The index is out of bounds, " + "please check whether the dimensions of index and " + "input meet the requirements. It should " + "be less than [%d] and greater than or equal to 0, but received [%d]", + input_index_dim_size, index_val); + int out_dim_index = next_idx - outer_dim_size * index_dim_index; int input_index = inner_dim_index * (outer_dim_size * input_index_dim_size) + diff --git a/paddle/fluid/operators/gather.h b/paddle/fluid/operators/gather.h index 8deab709220d7..b7fa4022882b7 100644 --- a/paddle/fluid/operators/gather.h +++ b/paddle/fluid/operators/gather.h @@ -67,11 +67,25 @@ void CPUGather(const platform::DeviceContext& ctx, const Tensor& src, // slice size int slice_size = 1; for (int i = 1; i < src_dims.size(); ++i) slice_size *= src_dims[i]; + // input size + int input_size = src_dims[0] * slice_size; const size_t slice_bytes = slice_size * sizeof(T); for (int64_t i = 0; i < index_size; ++i) { IndexT index_ = p_index[i]; + PADDLE_ENFORCE_LT(p_index[i], input_size, + platform::errors::OutOfRange( + "The element of Index must be less than the size of " + "input dim size of axis which is %d, but received " + "index element which is %d in the %d index.", + input_size, p_index[i], i)); + PADDLE_ENFORCE_GE(p_index[i], 0UL, + platform::errors::OutOfRange( + "The element of Index must be greater than or equal " + "to 0, but received index element which is %d in the " + "%d index.", + p_index[i], i)); memcpy(p_output + i * slice_size, p_src + index_ * slice_size, slice_bytes); } } @@ -141,11 +155,17 @@ void GatherV2Function(const Tensor* input, const Tensor* index, int axis, int input_index_dim_size = input_dim[axis_index]; for (int i = 0; i < index_size; i++) { PADDLE_ENFORCE_LT(index_data[i], input_index_dim_size, - platform::errors::InvalidArgument( + platform::errors::OutOfRange( "The element of Index must be less than the size of " "input dim size of axis which is %d, but received " "index element which is %d in the %d index.", input_index_dim_size, index_data[i], i)); + PADDLE_ENFORCE_GE(index_data[i], 0UL, + platform::errors::OutOfRange( + "The element of Index must be greater than or equal " + "to 0, but received index element which is %d in the " + "%d index.", + index_data[i], i)); } int inner_dim_size = 1; From 3bba806f469e7a35868ed10733429d3c14e2c169 Mon Sep 17 00:00:00 2001 From: haohongxiang Date: Tue, 13 Jul 2021 10:40:33 +0800 Subject: [PATCH 2/2] Code Optimization, test=develop --- paddle/fluid/operators/gather.cu.h | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/operators/gather.cu.h b/paddle/fluid/operators/gather.cu.h index 2647ecff936b3..6469307bc5652 100644 --- a/paddle/fluid/operators/gather.cu.h +++ b/paddle/fluid/operators/gather.cu.h @@ -31,8 +31,7 @@ using platform::DeviceContext; template __global__ void GatherCUDAKernel(const T* params, const IndexT* indices, T* output, size_t input_size, - size_t index_size, size_t slice_size, - size_t end_size) { + size_t index_size, size_t slice_size) { CUDA_KERNEL_LOOP(i, index_size * slice_size) { int indices_i = i / slice_size; int slice_i = i - indices_i * slice_size; // offset inside the slice @@ -99,21 +98,13 @@ void GPUGather(const platform::DeviceContext& ctx, const Tensor& src, " the second dimension should be 1.")); } - const auto gplace = BOOST_GET_CONST(platform::CUDAPlace, ctx.GetPlace()); - auto cplace = platform::CPUPlace(); - auto index_dims = index.dims(); - auto index_dims_size = index_dims.size(); - auto input_dims = src.dims(); - auto input_dims_size = input_dims.size(); - + // index size int index_size = index.dims()[0]; auto src_dims = src.dims(); framework::DDim output_dims(src_dims); output_dims[0] = index_size; - // final dim - int64_t end_size = index_dims[index_dims_size - 1]; // slice size int slice_size = 1; for (int i = 1; i < src_dims.size(); ++i) slice_size *= src_dims[i]; @@ -131,7 +122,7 @@ void GPUGather(const platform::DeviceContext& ctx, const Tensor& src, GatherCUDAKernel<<< grid, block, 0, reinterpret_cast(ctx).stream()>>>( - p_src, p_index, p_output, input_size, index_size, slice_size, end_size); + p_src, p_index, p_output, input_size, index_size, slice_size); } template