Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#8 from FDInSky/farthest_point_samplin…
Browse files Browse the repository at this point in the history
…g_op

add farthest point sampling op
  • Loading branch information
heavengate authored Aug 1, 2019
2 parents 5fd08fc + 253e1c6 commit 3d22ce1
Show file tree
Hide file tree
Showing 7 changed files with 316 additions and 2 deletions.
2 changes: 1 addition & 1 deletion cmake/operators.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ function(op_library TARGET)
"tensor_array_read_write_op" "tensorrt_engine_op" "conv_fusion_op"
"fusion_transpose_flatten_concat_op" "fusion_conv_inception_op"
"sync_batch_norm_op" "deformable_conv_op" "dgc_op" "three_nn_op"
"three_interp_op" "gather_point_op" "query_ball_op" "group_points_op")
"three_interp_op" "gather_point_op" "query_ball_op" "group_points_op" "farthest_point_sampling_op")
if ("${TARGET}" STREQUAL "${manual_pybind_op}")
set(pybind_flag 1)
endif()
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/API.spec
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ paddle.fluid.layers.unfold (ArgSpec(args=['x', 'kernel_sizes', 'strides', 'paddi
paddle.fluid.layers.deformable_roi_pooling (ArgSpec(args=['input', 'rois', 'trans', 'no_trans', 'spatial_scale', 'group_size', 'pooled_height', 'pooled_width', 'part_size', 'sample_per_part', 'trans_std', 'position_sensitive', 'name'], varargs=None, keywords=None, defaults=(False, 1.0, [1, 1], 1, 1, None, 1, 0.1, False, None)), ('document', '99c03e3f249e36854f87dedaa17c8f35'))
paddle.fluid.layers.shard_index (ArgSpec(args=['input', 'index_num', 'nshards', 'shard_id', 'ignore_value'], varargs=None, keywords=None, defaults=(-1,)), ('document', '5786fdbba6753ecd6cbce5e6b0889924'))
paddle.fluid.layers.query_ball (ArgSpec(args=['input', 'new_points', 'radius', 'n_sample'], varargs=None, keywords=None, defaults=None), ('document', 'c9253ae63bc409656f93050a93e7a298'))
paddle.fluid.layers.farthest_point_sampling (ArgSpec(args=['input', 'sampled_point_num'], varargs=None, keywords=None, defaults=None), ('document', '8fc4551813372802eaad905cff72b7c4'))
paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)), ('document', '9d7806e31bdf727c1a23b8782a09b545'))
paddle.fluid.layers.open_files (ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)), ('document', 'cccb6eb5410c822e5307c947aca2c899'))
paddle.fluid.layers.read_file (ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None), ('document', '32181f6037e387fb6e68a5beaafe33b6'))
Expand Down
4 changes: 3 additions & 1 deletion paddle/fluid/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ endif()

register_operators(EXCLUDES py_func_op warpctc_op dgc_op conv_fusion_op
sync_batch_norm_op deformable_conv_op three_nn_op three_interp_op
gather_point_op query_ball_op group_points_op
gather_point_op query_ball_op group_points_op farthest_point_sampling_op
DEPS ${OP_HEADER_DEPS} ${OP_PREFETCH_DEPS})

if (WITH_GPU)
Expand Down Expand Up @@ -81,6 +81,8 @@ if (WITH_GPU)
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(gather_point);\n")
op_library(query_ball_op)
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(query_ball);\n")
op_library(farthest_point_sampling_op)
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(farthest_point_sampling);\n")
else()
op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale)
endif()
Expand Down
68 changes: 68 additions & 0 deletions paddle/fluid/operators/farthest_point_sampling_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"

namespace paddle {
namespace operators {

using Tensor = framework::Tensor;

class FarthestPointSamplingOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X",
"(Tensor)input point cloud dataset with shape (B, N, 3)"
"B is batch size, N is points's nums, 3 is (x,y,z) coordinate");
AddOutput("Output",
"(Tensor)return sampled points with shape (B, M)"
"B is batch size, M is points's nums");
AddAttr<int>("sampled_point_num", "sampling points's num")
.SetDefault(0)
.EqualGreaterThan(0);
AddComment(
R"Doc(
Sampling point based on
its max eucliden distance with other points.)Doc");
}
};

class FarthestPointSamplingOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;

protected:
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) shoud not be null");
auto x_dims = ctx->GetInputDim("X");
PADDLE_ENFORCE(x_dims.size() == 3,
"Input(X) of FathestPointSamplingOp should be 3-D Tensor");
const int m = ctx->Attrs().Get<int>("sampled_point_num");
ctx->SetOutputDim("Output", {x_dims[0], m});
}

protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
auto input_data_type = ctx.Input<Tensor>("X")->type();
return framework::OpKernelType(input_data_type, ctx.GetPlace());
}
};

} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;
REGISTER_OPERATOR(farthest_point_sampling, ops::FarthestPointSamplingOp,
ops::FarthestPointSamplingOpMaker);
146 changes: 146 additions & 0 deletions paddle/fluid/operators/farthest_point_sampling_op.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"

namespace paddle {
namespace operators {

using Tensor = framework::Tensor;

template <typename T, unsigned int block_size>
__global__ void farthestpointsamplingKernel(int b, int n, int m,
const T *__restrict__ dataset,
T *__restrict__ temp,
int *__restrict__ idxs) {
// 1. add first point
// 2. add the point having farthest distance with first point's
// 3. make second point as first point, repeat 1,2
if (m <= 0) return;
const int BlockSize = block_size;
__shared__ float dists[BlockSize];
__shared__ int dists_i[BlockSize];
const int BufferSize = 3072;
__shared__ float buf[BufferSize * 3];

// one block one batch, n points
// one thread one point
for (int i = blockIdx.x; i < b; i += gridDim.x) {
// can select old point as first point randomly
int old = 0;
if (threadIdx.x == 0) idxs[i * m + 0] = old;

for (int j = threadIdx.x; j < n; j += blockDim.x) {
temp[blockIdx.x * n + j] = 1e38;
}
for (int j = threadIdx.x; j < min(BufferSize, n) * 3; j += blockDim.x) {
buf[j] = dataset[i * n * 3 + j];
}
// wait all threads do this in the same block
__syncthreads();

// out m points
for (int j = 1; j < m; j++) {
// Step 1.
// fatherest distance
int besti = 0;
float best = -1;
// first point in m points
float x1 = dataset[i * n * 3 + old * 3 + 0];
float y1 = dataset[i * n * 3 + old * 3 + 1];
float z1 = dataset[i * n * 3 + old * 3 + 2];

// Step 2.
// find farthest point of (x1, y1, z1)
for (int k = threadIdx.x; k < n; k += blockDim.x) {
float td = temp[blockIdx.x * n + k];
float x2, y2, z2;
if (k < BufferSize) {
x2 = buf[k * 3 + 0];
y2 = buf[k * 3 + 1];
z2 = buf[k * 3 + 2];
} else {
x2 = dataset[i * n * 3 + k * 3 + 0];
y2 = dataset[i * n * 3 + k * 3 + 1];
z2 = dataset[i * n * 3 + k * 3 + 2];
}
// compute eucliden distance
float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) +
(z2 - z1) * (z2 - z1);
float d2 = min(d, td);
if (d2 != td) temp[blockIdx.x * n + k] = d2;
if (d2 > best) {
best = d2;
besti = k;
}
}

// step 3.
dists[threadIdx.x] = best;
dists_i[threadIdx.x] = besti;
for (int u = 0; (1 << u) < blockDim.x; u++) {
__syncthreads();
if (threadIdx.x < (blockDim.x >> (u + 1))) {
int i1 = (threadIdx.x * 2) << u;
int i2 = (threadIdx.x * 2 + 1) << u;
if (dists[i1] < dists[i2]) {
dists[i1] = dists[i2];
dists_i[i1] = dists_i[i2];
}
}
}
__syncthreads();
// store the found node index
old = dists_i[0];
if (threadIdx.x == 0) idxs[i * m + j] = old;
}
}
}

template <typename T>
class FarthestPointSamplingOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"This kernel only runs on GPU device.");
auto *input = ctx.Input<Tensor>("X");
auto *output = ctx.Output<Tensor>("Output");
if (input->numel() == 0) return;
// allocate memory
auto *ptr_out_points_index = output->mutable_data<int>(ctx.GetPlace());

// b, n, m
int batch_size = input->dims()[0];
int in_n_points = input->dims()[1];
int out_m_points = ctx.Attr<int>("sampled_point_num");

const T *ptr_in_points = input->data<T>();

Tensor tmp;
auto *ptr_tmp_e =
tmp.mutable_data<T>({batch_size, in_n_points}, ctx.GetPlace());

// run fathest point sampling kernel
// P40 have max 512 thread
farthestpointsamplingKernel<T, 512><<<32, 512>>>(
batch_size, in_n_points, out_m_points, ptr_in_points, ptr_tmp_e,
ptr_out_points_index);
}
};

} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(farthest_point_sampling,
ops::FarthestPointSamplingOpCUDAKernel<float>,
ops::FarthestPointSamplingOpCUDAKernel<double>);
33 changes: 33 additions & 0 deletions python/paddle/fluid/layers/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@
'deformable_roi_pooling',
'shard_index',
'query_ball',
'farthest_point_sampling',
]

kIgnoreIndex = -100
Expand Down Expand Up @@ -12871,3 +12872,35 @@ def query_ball(input, new_points, radius, n_sample):
"Radius": radius},
outputs={"Output": out})
return out


def farthest_point_sampling(input, sampled_point_num):
'''
Sampling point based on its max eucliden distance with other points.
Args:
input (Variable): input point cloud dataset with shape (B, N, 3)
B is batch size, N is points's nums, 3 is (x,y,z) coordinate
sampled_point_num (int): sampled points's nums
Retrun:
output (Variable): return sampled points with shape (B, M)
B is batch size, M is points's nums
Examples:
.. code-block:: python
x = fluid.layers.data(name='data', shape=(2,100,3), dtype='float32')
sampled_points = fluid.layers.farthest_point_sampling(
x, 50
)
'''

helper = LayerHelper('farthest_point_sampling', **locals())
dtype = helper.input_type()
op_out = helper.create_variable_for_type_inference(dtype)
helper.append_op(
type='farthest_point_sampling',
inputs={'X': input},
outputs={'Output': op_out},
attrs={'sampled_point_num': sampled_point_num})
return op_out
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
import numpy as np
from op_test import OpTest
import paddle.fluid.core as core


def farthest_point_sampling(xyz, npoint):
B, N, C = xyz.shape
S = npoint

centroids = np.zeros((B, S))
distance = np.ones((B, N)) * 1e10
farthest = 0
batch_indices = np.arange(B).astype('int32')
for i in range(S):
centroids[:, i] = farthest
centroid = xyz[batch_indices, farthest, :].reshape((B, 1, 3))
dist = np.sum((xyz - centroid)**2, -1)
mask = dist < distance
distance[mask] = dist[mask]
farthest = np.argmax(distance, -1)
return centroids.astype('int32')


class TestFarthestPointSamplingOp(OpTest):
def setUp(self):
self.op_type = 'farthest_point_sampling'
self.config()
x = np.random.randint(1, 100,
(self.x_shape[0] * self.x_shape[1] *
3, )).reshape(self.x_shape).astype(self.x_type)
m = self.sampled_point_num
out_np = farthest_point_sampling(x, m)
self.inputs = {'X': x, }
self.attrs = {'sampled_point_num': m, }
self.outputs = {'Output': out_np, }

def config(self):
self.x_shape = (1, 512, 3)
self.x_type = 'float32'
self.sampled_point_num = 256

def test_check_output(self):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
self.check_output_with_place(place, atol=1e-3)


if __name__ == "__main__":
unittest.main()

0 comments on commit 3d22ce1

Please sign in to comment.