From 13c4acede87a8ea33d8175398aad8b40b75a30e1 Mon Sep 17 00:00:00 2001 From: Robert Huitl Date: Fri, 15 Mar 2013 16:34:50 +0100 Subject: [PATCH] Implementation of an OpenMP-parallelized version of Moving Least Squares --- surface/include/pcl/surface/impl/mls.hpp | 110 ++++++++++++++++++--- surface/include/pcl/surface/mls.h | 83 ++++++++++++++-- surface/src/mls.cpp | 3 + test/surface/test_moving_least_squares.cpp | 4 +- 4 files changed, 178 insertions(+), 22 deletions(-) diff --git a/surface/include/pcl/surface/impl/mls.hpp b/surface/include/pcl/surface/impl/mls.hpp index b66614892c3..cef084cb296 100644 --- a/surface/include/pcl/surface/impl/mls.hpp +++ b/surface/include/pcl/surface/impl/mls.hpp @@ -47,12 +47,16 @@ #include #include +#ifdef _OPENMP +#include +#endif + ////////////////////////////////////////////////////////////////////////////////////////////// template void pcl::MovingLeastSquares::process (PointCloudOut &output) { // Reset or initialize the collection of indices - corresponding_input_indices_.reset(new PointIndices); + corresponding_input_indices_.reset (new PointIndices); // Check if normals have to be computed/saved if (compute_normals_) @@ -156,8 +160,13 @@ pcl::MovingLeastSquares::computeMLSPointNormal (int index, const std::vector &nn_indices, std::vector &nn_sqr_dists, PointCloudOut &projected_points, - NormalCloud &projected_points_normals) + NormalCloud &projected_points_normals, + PointIndices &corresponding_input_indices, + MLSResult &mls_result) const { + // Note: this method is const because it needs to be thread-safe + // (MovingLeastSquaresOMP calls it from multiple threads) + // Compute the plane coefficients EIGEN_ALIGN16 Eigen::Matrix3d covariance_matrix; Eigen::Vector4d xyz_centroid; @@ -285,7 +294,7 @@ pcl::MovingLeastSquares::computeMLSPointNormal (int index, aux_normal.normal_z = static_cast (normal[2]); aux_normal.curvature = curvature; projected_points_normals.push_back (aux_normal); - corresponding_input_indices_->indices.push_back (index); + corresponding_input_indices.indices.push_back (index); } break; @@ -306,7 +315,7 @@ pcl::MovingLeastSquares::computeMLSPointNormal (int index, projected_point, projected_normal); projected_points.push_back (projected_point); - corresponding_input_indices_->indices.push_back (index); + corresponding_input_indices.indices.push_back (index); if (compute_normals_) projected_points_normals.push_back (projected_normal); } @@ -336,7 +345,7 @@ pcl::MovingLeastSquares::computeMLSPointNormal (int index, aux.y = static_cast (point[1]); aux.z = static_cast (point[2]); projected_points.push_back (aux); - corresponding_input_indices_->indices.push_back (index); + corresponding_input_indices.indices.push_back (index); if (compute_normals_) { @@ -368,7 +377,7 @@ pcl::MovingLeastSquares::computeMLSPointNormal (int index, projected_point, projected_normal); projected_points.push_back (projected_point); - corresponding_input_indices_->indices.push_back (index); + corresponding_input_indices.indices.push_back (index); if (compute_normals_) projected_points_normals.push_back (projected_normal); @@ -383,8 +392,7 @@ pcl::MovingLeastSquares::computeMLSPointNormal (int index, { // Take all point pairs and sample space between them in a grid-fashion // \note consider only point pairs with increasing indices - MLSResult result (point, plane_normal, u_axis, v_axis, c_vec, static_cast (nn_indices.size ()), curvature); - mls_results_[index] = result; + mls_result = MLSResult (point, plane_normal, u_axis, v_axis, c_vec, static_cast (nn_indices.size ()), curvature); break; } } @@ -400,7 +408,7 @@ pcl::MovingLeastSquares::projectPointToMLSSurface (float &u Eigen::VectorXd &c_vec, int num_neighbors, PointOutT &result_point, - pcl::Normal &result_normal) + pcl::Normal &result_normal) const { double n_disp = 0.0f; double d_u = 0.0f, d_v = 0.0f; @@ -447,7 +455,6 @@ pcl::MovingLeastSquares::projectPointToMLSSurface (float &u result_normal.curvature = curvature; } - ////////////////////////////////////////////////////////////////////////////////////////////// template void pcl::MovingLeastSquares::performProcessing (PointCloudOut &output) @@ -477,7 +484,8 @@ pcl::MovingLeastSquares::performProcessing (PointCloudOut & PointCloudOut projected_points; NormalCloud projected_points_normals; // Get a plane approximating the local surface's tangent and project point onto it - computeMLSPointNormal ((*indices_)[cp], nn_indices, nn_sqr_dists, projected_points, projected_points_normals); + int index = (*indices_)[cp]; + computeMLSPointNormal (index, nn_indices, nn_sqr_dists, projected_points, projected_points_normals, *corresponding_input_indices_, mls_results_[index]); // Copy all information from the input cloud to the output points (not doing any interpolation) @@ -491,7 +499,81 @@ pcl::MovingLeastSquares::performProcessing (PointCloudOut & normals_->insert (normals_->end (), projected_points_normals.begin (), projected_points_normals.end ()); } + // Perform the distinct-cloud or voxel-grid upsampling + performUpsampling (output); +} + +////////////////////////////////////////////////////////////////////////////////////////////// +#ifdef _OPENMP +template void +pcl::MovingLeastSquaresOMP::performProcessing (PointCloudOut &output) +{ + // Compute the number of coefficients + nr_coeff_ = (order_ + 1) * (order_ + 2) / 2; + + // (Maximum) number of threads + unsigned int threads = threads_ == 0 ? 1 : threads_; + + // Create temporaries for each thread in order to avoid synchronization + std::vector projected_points (threads); + std::vector projected_points_normals (threads); + std::vector corresponding_input_indices (threads); + + // For all points +#pragma omp parallel for schedule (dynamic,1000) num_threads (threads) + for (size_t cp = 0; cp < indices_->size (); ++cp) + { + // Allocate enough space to hold the results of nearest neighbor searches + // \note resize is irrelevant for a radiusSearch (). + std::vector nn_indices; + std::vector nn_sqr_dists; + + // Get the initial estimates of point positions and their neighborhoods + if (!searchForNeighbors ((*indices_)[cp], nn_indices, nn_sqr_dists)) + continue; + + + // Check the number of nearest neighbors for normal estimation (and later + // for polynomial fit as well) + if (nn_indices.size () < 3) + continue; + + + // This thread's ID (range 0 to threads-1) + int tn = omp_get_thread_num (); + + // Size of projected points before computeMLSPointNormal () adds points + size_t pp_size = projected_points[tn].size (); + + // Get a plane approximating the local surface's tangent and project point onto it + int index = (*indices_)[cp]; + computeMLSPointNormal (index, nn_indices, nn_sqr_dists, projected_points[tn], projected_points_normals[tn], corresponding_input_indices[tn], this->mls_results_[index]); + + // Copy all information from the input cloud to the output points (not doing any interpolation) + for (size_t pp = pp_size; pp < projected_points[tn].size (); ++pp) + copyMissingFields (input_->points[(*indices_)[cp]], projected_points[tn][pp]); + } + + // Combine all threads' results into the output vectors + for (unsigned int tn = 0; tn < threads; ++tn) + { + output.insert (output.end (), projected_points[tn].begin (), projected_points[tn].end ()); + corresponding_input_indices_->indices.insert (corresponding_input_indices_->indices.end (), + corresponding_input_indices[tn].indices.begin (), corresponding_input_indices[tn].indices.end ()); + if (compute_normals_) + normals_->insert (normals_->end (), projected_points_normals[tn].begin (), projected_points_normals[tn].end ()); + } + + // Perform the distinct-cloud or voxel-grid upsampling + performUpsampling (output); +} +#endif + +////////////////////////////////////////////////////////////////////////////////////////////// +template void +pcl::MovingLeastSquares::performUpsampling (PointCloudOut &output) +{ if (upsample_method_ == DISTINCT_CLOUD) { for (size_t dp_i = 0; dp_i < distinct_cloud_->size (); ++dp_i) // dp_i = distinct_point_i @@ -588,7 +670,7 @@ pcl::MovingLeastSquares::performProcessing (PointCloudOut & copyMissingFields (input_->points[input_index], result_point); // Store the id of the original point - corresponding_input_indices_->indices.push_back(input_index); + corresponding_input_indices_->indices.push_back (input_index); output.push_back (result_point); @@ -687,4 +769,8 @@ pcl::MovingLeastSquares::copyMissingFields (const PointInT #define PCL_INSTANTIATE_MovingLeastSquares(T,OutT) template class PCL_EXPORTS pcl::MovingLeastSquares; +#ifdef _OPENMP +#define PCL_INSTANTIATE_MovingLeastSquaresOMP(T,OutT) template class PCL_EXPORTS pcl::MovingLeastSquaresOMP; +#endif + #endif // PCL_SURFACE_IMPL_MLS_H_ diff --git a/surface/include/pcl/surface/mls.h b/surface/include/pcl/surface/mls.h index 013f19401f1..9de582c2ced 100644 --- a/surface/include/pcl/surface/mls.h +++ b/surface/include/pcl/surface/mls.h @@ -342,7 +342,7 @@ namespace pcl /** \brief Data structure used to store the results of the MLS fitting - * \note Used only in the case of VOXEL_GRID_DILATION upsampling + * \note Used only in the case of VOXEL_GRID_DILATION or DISTINCT_CLOUD upsampling */ struct MLSResult { @@ -364,7 +364,7 @@ namespace pcl }; /** \brief Stores the MLS result for each point in the input cloud - * \note Used only in the case of VOXEL_GRID_DILATION upsampling + * \note Used only in the case of VOXEL_GRID_DILATION or DISTINCT_CLOUD upsampling */ std::vector mls_results_; @@ -434,13 +434,16 @@ namespace pcl /** \brief Number of coefficients, to be computed from the requested order.*/ int nr_coeff_; + /** \brief Collects for each point in output the corrseponding point in the input. */ + PointIndicesPtr corresponding_input_indices_; + /** \brief Search for the closest nearest neighbors of a given point using a radius search * \param[in] index the index of the query point * \param[out] indices the resultant vector of indices representing the k-nearest neighbors * \param[out] sqr_distances the resultant squared distances from the query point to the k-nearest neighbors */ inline int - searchForNeighbors (int index, std::vector &indices, std::vector &sqr_distances) + searchForNeighbors (int index, std::vector &indices, std::vector &sqr_distances) const { return (search_method_ (index, search_radius_, indices, sqr_distances)); } @@ -453,13 +456,18 @@ namespace pcl * (in the case of upsampling method NONE, only the query point projected to its own fitted surface will be returned, * in the case of the other upsampling methods, multiple points will be returned) * \param[out] projected_points_normals the normals corresponding to the projected points + * \param[out] corresponding_input_indices the set of indices with each point in output having the corresponding point in input + * \param[out] mls_result stores the MLS result for each point in the input cloud + * (used only in the case of VOXEL_GRID_DILATION or DISTINCT_CLOUD upsampling) */ void computeMLSPointNormal (int index, const std::vector &nn_indices, std::vector &nn_sqr_dists, PointCloudOut &projected_points, - NormalCloud &projected_points_normals); + NormalCloud &projected_points_normals, + PointIndices &corresponding_input_indices, + MLSResult &mls_result) const; /** \brief Fits a point (sample point) given in the local plane coordinates of an input point (query point) to * the MLS surface of the input point @@ -484,21 +492,23 @@ namespace pcl Eigen::VectorXd &c_vec, int num_neighbors, PointOutT &result_point, - pcl::Normal &result_normal); + pcl::Normal &result_normal) const; void copyMissingFields (const PointInT &point_in, PointOutT &point_out) const; - private: /** \brief Abstract surface reconstruction method. * \param[out] output the result of the reconstruction */ virtual void performProcessing (PointCloudOut &output); - /** \brief Collects for each point in output the corrseponding point in the input. */ - PointIndicesPtr corresponding_input_indices_; + /** \brief Perform upsampling for the distinct-cloud and voxel-grid methods + * \param[out] output the result of the reconstruction + */ + void performUpsampling (PointCloudOut &output); + private: /** \brief Boost-based random number generator algorithm. */ boost::mt19937 rng_alg_; @@ -515,6 +525,63 @@ namespace pcl public: EIGEN_MAKE_ALIGNED_OPERATOR_NEW }; + +#ifdef _OPENMP + /** \brief MovingLeastSquaresOMP is a parallelized version of MovingLeastSquares, using the OpenMP standard. + * \note Compared to MovingLeastSquares, an overhead is incurred in terms of runtime and memory usage. + * \note The upsampling methods DISTINCT_CLOUD and VOXEL_GRID_DILATION are not parallelized completely, i.e. parts of the algorithm run on a single thread only. + * \author Robert Huitl + * \ingroup surface + */ + template + class MovingLeastSquaresOMP: public MovingLeastSquares + { + public: + typedef boost::shared_ptr > Ptr; + typedef boost::shared_ptr > ConstPtr; + + using PCLBase::input_; + using PCLBase::indices_; + using MovingLeastSquares::normals_; + using MovingLeastSquares::corresponding_input_indices_; + using MovingLeastSquares::nr_coeff_; + using MovingLeastSquares::order_; + using MovingLeastSquares::compute_normals_; + + typedef pcl::PointCloud NormalCloud; + typedef pcl::PointCloud::Ptr NormalCloudPtr; + + typedef pcl::PointCloud PointCloudOut; + typedef typename PointCloudOut::Ptr PointCloudOutPtr; + typedef typename PointCloudOut::ConstPtr PointCloudOutConstPtr; + + /** \brief Constructor for parallelized Moving Least Squares + * \param threads the maximum number of hardware threads to use (0 sets the value to 1) + */ + MovingLeastSquaresOMP (unsigned int threads = 0) : threads_ (threads) + { + + } + + /** \brief Set the maximum number of threads to use + * \param threads the maximum number of hardware threads to use (0 sets the value to 1) + */ + inline void + setNumberOfThreads (unsigned int threads = 0) + { + threads_ = threads; + } + + protected: + /** \brief Abstract surface reconstruction method. + * \param[out] output the result of the reconstruction + */ + virtual void performProcessing (PointCloudOut &output); + + /** \brief The maximum number of threads the scheduler should use. */ + unsigned int threads_; + }; +#endif } #ifdef PCL_NO_PRECOMPILE diff --git a/surface/src/mls.cpp b/surface/src/mls.cpp index 4f7aab993b2..65c5340c91b 100644 --- a/surface/src/mls.cpp +++ b/surface/src/mls.cpp @@ -46,5 +46,8 @@ PCL_INSTANTIATE_PRODUCT(MovingLeastSquares, ((pcl::PointXYZ)(pcl::PointXYZRGB)(pcl::PointXYZRGBA)) ((pcl::PointXYZ)(pcl::PointXYZRGB)(pcl::PointXYZRGBA)(pcl::PointXYZRGBNormal)(pcl::PointNormal))) +PCL_INSTANTIATE_PRODUCT(MovingLeastSquaresOMP, ((pcl::PointXYZ)(pcl::PointXYZRGB)(pcl::PointXYZRGBA)) + ((pcl::PointXYZ)(pcl::PointXYZRGB)(pcl::PointXYZRGBA)(pcl::PointXYZRGBNormal)(pcl::PointNormal))) + /// Ideally, we should instantiate like below, but it takes large amounts of main memory for compilation //PCL_INSTANTIATE_PRODUCT(MovingLeastSquares, (PCL_XYZ_POINT_TYPES)(PCL_XYZ_POINT_TYPES)) diff --git a/test/surface/test_moving_least_squares.cpp b/test/surface/test_moving_least_squares.cpp index 2a887ef80df..71d7e8e0a83 100644 --- a/test/surface/test_moving_least_squares.cpp +++ b/test/surface/test_moving_least_squares.cpp @@ -89,13 +89,13 @@ TEST (PCL, MovingLeastSquares) // Testing OpenMP version - MovingLeastSquares mls_omp; + MovingLeastSquaresOMP mls_omp; mls_omp.setInputCloud (cloud); mls_omp.setComputeNormals (true); mls_omp.setPolynomialFit (true); mls_omp.setSearchMethod (tree); mls_omp.setSearchRadius (0.03); - //mls_omp.setNumberOfThreads (4); + mls_omp.setNumberOfThreads (4); // Reconstruct mls_normals->clear ();