diff --git a/packages/gpu_voxels/CMakeLists.txt b/packages/gpu_voxels/CMakeLists.txt index 22e5f66..a2cff10 100644 --- a/packages/gpu_voxels/CMakeLists.txt +++ b/packages/gpu_voxels/CMakeLists.txt @@ -153,7 +153,7 @@ SET(ICMAKER_CUDA_CODE compute_${ICMAKER_CUDA_COMPUTE_VERSION}) SET(ICMAKER_CUDA_PTXAS_VERBOSE "") # "--resource-usage") #nvcc outputs register and memory usage data for each kernel SET(ICMAKER_CUDA_WALL "-Xcompiler=-Wall") -SET(ICMAKER_CUDA_MAXREGS "--maxrregcount=63") # set to 31 to compile for JetsonTX1; N*blocksize must be smaller than max registers per block +SET(ICMAKER_CUDA_MAXREGS "--maxrregcount=63") # set to 31 to compile on JetsonTX1/TX2/Nano (compute capability 5.3/6.2); N*blocksize must be smaller than max registers per block SET(ICMAKER_CUDA_DEBUG "-lineinfo") # "-lineinfo") #"-g -G") # -G disables optimization. use -lineinfo for profiling # Enable position independent code diff --git a/packages/gpu_voxels/src/examples/DistanceROSDemo.cpp b/packages/gpu_voxels/src/examples/DistanceROSDemo.cpp index 138c6d6..4cc56ea 100644 --- a/packages/gpu_voxels/src/examples/DistanceROSDemo.cpp +++ b/packages/gpu_voxels/src/examples/DistanceROSDemo.cpp @@ -31,11 +31,11 @@ * Start the demo and then the visualizer. * Example parameters: ./build/bin/distance_ros_demo -e 0.3 -f 1 -s 0.008 #voxel_size 8mm, filter_threshold 1, erode less than 30% occupied neighborhoods * - * To see a small "distance hull": + * To see a small "distance hull": * Right-click the visualizer and select "Render Mode > Distance Maps Rendermode > Multicolor gradient" * Press "s" to disable all "distance hulls" * Press "Alt-1" an then "1" to enable drawing of SweptVolumeID 11, corresponding to a distance of "1" - * + * * To see a large "distance hull": * Press "ALT-t" two times, then "s" two times. * You will see the Kinect pointcloud inflated by 10 Voxels. @@ -70,12 +70,12 @@ Matrix4f tf; void ctrlchandler(int) { - exit(EXIT_SUCCESS); + exit(EXIT_SUCCESS); } void killhandler(int) { - exit(EXIT_SUCCESS); + exit(EXIT_SUCCESS); } /** @@ -85,187 +85,186 @@ void killhandler(int) */ void callback(const pcl::PointCloud::ConstPtr& msg) { - std::vector point_data; - point_data.resize(msg->points.size()); + std::vector point_data; + point_data.resize(msg->points.size()); + + for (uint32_t i = 0; i < msg->points.size(); i++) + { + point_data[i].x = msg->points[i].x; + point_data[i].y = msg->points[i].y; + point_data[i].z = msg->points[i].z; + } - for (uint32_t i = 0; i < msg->points.size(); i++) - { - point_data[i].x = msg->points[i].x; - point_data[i].y = msg->points[i].y; - point_data[i].z = msg->points[i].z; - } + //my_point_cloud.add(point_data); + my_point_cloud.update(point_data); - //my_point_cloud.add(point_data); - my_point_cloud.update(point_data); + // transform new pointcloud to world coordinates + my_point_cloud.transformSelf(&tf); - // transform new pointcloud to world coordinates - my_point_cloud.transformSelf(&tf); - - new_data_received = true; + new_data_received = true; - LOGGING_INFO(Gpu_voxels, "DistanceROSDemo camera callback. PointCloud size: " << msg->points.size() << endl); + LOGGING_INFO(Gpu_voxels, "DistanceROSDemo camera callback. PointCloud size: " << msg->points.size() << endl); } int main(int argc, char* argv[]) { - signal(SIGINT, ctrlchandler); - signal(SIGTERM, killhandler); - - icl_core::config::GetoptParameter points_parameter("points-topic:", "t", - "Identifer of the pointcloud topic"); - icl_core::config::GetoptParameter roll_parameter ("roll:", "r", - "Camera roll in degrees"); - icl_core::config::GetoptParameter pitch_parameter ("pitch:", "p", - "Camera pitch in degrees"); - icl_core::config::GetoptParameter yaw_parameter ("yaw:", "y", - "Camera yaw in degrees"); - icl_core::config::GetoptParameter voxel_side_length_parameter("voxel_side_length:", "s", - "Side length of a voxel, default 0.01"); - icl_core::config::GetoptParameter filter_threshold_parameter ("filter_threshold:", "f", - "Density filter threshold per voxel, default 1"); - icl_core::config::GetoptParameter erode_threshold_parameter ("erode_threshold:", "e", - "Erode voxels with fewer occupied neighbors (percentage)"); - icl_core::config::addParameter(points_parameter); - icl_core::config::addParameter(roll_parameter); - icl_core::config::addParameter(pitch_parameter); - icl_core::config::addParameter(yaw_parameter); - icl_core::config::addParameter(voxel_side_length_parameter); - icl_core::config::addParameter(filter_threshold_parameter); - icl_core::config::addParameter(erode_threshold_parameter); - icl_core::logging::initialize(argc, argv); - - voxel_side_length = icl_core::config::paramOptDefault("voxel_side_length", 0.01f); - - // setup "tf" to transform from camera to world / gpu-voxels coordinates - - //const Vector3f camera_offsets(2, 0, 1); // camera located at y=0, x_max/2, z_max/2 - const Vector3f camera_offsets(map_dimensions.x * voxel_side_length * 0.5f, -0.2f, map_dimensions.z * voxel_side_length * 0.5f); // camera located at y=-0.2m, x_max/2, z_max/2 - - float roll = icl_core::config::paramOptDefault("roll", 0.0f) * 3.141592f / 180.0f; - float pitch = icl_core::config::paramOptDefault("pitch", 0.0f) * 3.141592f / 180.0f; - float yaw = icl_core::config::paramOptDefault("yaw", 0.0f) * 3.141592f / 180.0f; - tf = Matrix4f::createFromRotationAndTranslation(Matrix3f::createFromRPY(-3.14/2.0 + roll, 0 + pitch, 0 + yaw), camera_offsets); - - std::string point_cloud_topic = icl_core::config::paramOptDefault("points-topic", "/camera/depth/points"); - LOGGING_INFO(Gpu_voxels, "DistanceROSDemo start. Point-cloud topic: " << point_cloud_topic << endl); - - // Generate a GPU-Voxels instance: - gvl = gpu_voxels::GpuVoxels::getInstance(); - gvl->initialize(map_dimensions.x, map_dimensions.y, map_dimensions.z, voxel_side_length); - - ros::init(argc, argv, "distance_ros_demo"); - ros::NodeHandle nh; - ros::Subscriber sub = nh.subscribe >(point_cloud_topic, 1, callback); - - //Vis Helper - gvl->addPrimitives(primitive_array::ePRIM_SPHERE, "measurementPoints"); - - //PBA - gvl->addMap(MT_DISTANCE_VOXELMAP, "pbaDistanceVoxmap"); - shared_ptr pbaDistanceVoxmap = dynamic_pointer_cast(gvl->getMap("pbaDistanceVoxmap")); - - gvl->addMap(MT_PROBAB_VOXELMAP, "erodeTempVoxmap1"); - shared_ptr erodeTempVoxmap1 = dynamic_pointer_cast(gvl->getMap("erodeTempVoxmap1")); - gvl->addMap(MT_PROBAB_VOXELMAP, "erodeTempVoxmap2"); - shared_ptr erodeTempVoxmap2 = dynamic_pointer_cast(gvl->getMap("erodeTempVoxmap2")); - - gvl->addMap(MT_COUNTING_VOXELLIST, "countingVoxelList"); - shared_ptr countingVoxelList = dynamic_pointer_cast(gvl->getMap("countingVoxelList")); - - gvl->addMap(MT_COUNTING_VOXELLIST, "countingVoxelListFiltered"); - shared_ptr countingVoxelListFiltered = dynamic_pointer_cast(gvl->getMap("countingVoxelListFiltered")); - - //PBA map clone for visualization without artifacts - gvl->addMap(MT_DISTANCE_VOXELMAP, "pbaDistanceVoxmapVisual"); - shared_ptr pbaDistanceVoxmapVisual = dynamic_pointer_cast(gvl->getMap("pbaDistanceVoxmapVisual")); - pbaDistanceVoxmapVisual->clearMap(); - - // Define two measurement points: - std::vector measurement_points; - measurement_points.push_back(Vector3i(40, 100, 50)); - measurement_points.push_back(Vector3i(160, 100, 50)); - gvl->modifyPrimitives("measurementPoints", measurement_points, 5); - - int filter_threshold = icl_core::config::paramOptDefault("filter_threshold", 0); - std::cout << "Remove voxels containing less points than: " << filter_threshold << std::endl; - - float erode_threshold = icl_core::config::paramOptDefault("erode_threshold", 0.0f); - std::cout << "Erode voxels with neighborhood occupancy ratio less or equal to: " << erode_threshold << std::endl; - - ros::Rate r(30); - size_t iteration = 0; - new_data_received = true; // call visualize on the first iteration - - LOGGING_INFO(Gpu_voxels, "start visualizing maps" << endl); - while (ros::ok()) - { - ros::spinOnce(); - - LOGGING_DEBUG(Gpu_voxels, "START iteration" << endl); - - // visualize new pointcloud if there is new data - if (new_data_received) - { - new_data_received = false; - iteration++; - - pbaDistanceVoxmap->clearMap(); - countingVoxelList->clearMap(); - countingVoxelListFiltered->clearMap(); - erodeTempVoxmap1->clearMap(); - erodeTempVoxmap2->clearMap(); - - // Insert the CAMERA data (now in world coordinates) into the list - countingVoxelList->insertPointCloud(my_point_cloud, eBVM_OCCUPIED); - gvl->visualizeMap("countingVoxelList"); - - countingVoxelListFiltered->merge(countingVoxelList); - countingVoxelListFiltered->remove_underpopulated(filter_threshold); - gvl->visualizeMap("countingVoxelListFiltered"); - - LOGGING_INFO(Gpu_voxels, "erode voxels into pbaDistanceVoxmap" << endl); - erodeTempVoxmap1->merge(countingVoxelListFiltered); - if (erode_threshold > 0) - { - erodeTempVoxmap1->erodeInto(*erodeTempVoxmap2, erode_threshold); - } else - { - erodeTempVoxmap1->erodeLonelyInto(*erodeTempVoxmap2); //erode only "lonely voxels" without occupied neighbors - } - pbaDistanceVoxmap->mergeOccupied(erodeTempVoxmap2); - - // Calculate the distance map: - LOGGING_INFO(Gpu_voxels, "calculate distance map for " << countingVoxelList->getDimensions().x << " occupied voxels" << endl); - pbaDistanceVoxmap->parallelBanding3D(); - - LOGGING_INFO(Gpu_voxels, "start cloning pbaDistanceVoxmap" << endl); - pbaDistanceVoxmapVisual->clone(*(pbaDistanceVoxmap.get())); - LOGGING_INFO(Gpu_voxels, "done cloning pbaDistanceVoxmap" << endl); - - gvl->visualizeMap("pbaDistanceVoxmapVisual"); - gvl->visualizePrimitivesArray("measurementPoints"); - - // For the measurement points we query the clearance to the closest obstacle: - thrust::device_ptr dvm_thrust_ptr(pbaDistanceVoxmap->getDeviceDataPtr()); - for(size_t i = 0; i < measurement_points.size(); i++) - { - int id = voxelmap::getVoxelIndexSigned(map_dimensions, measurement_points[i]); - - //get DistanceVoxel with closest obstacle information - // DistanceVoxel dv = dvm_thrust_ptr[id]; // worked before Cuda9 - DistanceVoxel dv; //get DistanceVoxel with closest obstacle information - cudaMemcpy(&dv, (dvm_thrust_ptr+id).get(), sizeof(DistanceVoxel), cudaMemcpyDeviceToHost); - - float metric_free_space = sqrtf(dv.squaredObstacleDistance(measurement_points[i])) * voxel_side_length; - LOGGING_INFO(Gpu_voxels, "Obstacle @ " << dv.getObstacle() << " Voxel @ " << measurement_points[i] << " has a clearance of " << metric_free_space << "m." << endl); - } - } - - r.sleep(); - } - - LOGGING_INFO(Gpu_voxels, "shutting down" << endl); - - exit(EXIT_SUCCESS); -} + signal(SIGINT, ctrlchandler); + signal(SIGTERM, killhandler); + + icl_core::config::GetoptParameter points_parameter("points-topic:", "t", + "Identifer of the pointcloud topic"); + icl_core::config::GetoptParameter roll_parameter ("roll:", "r", + "Camera roll in degrees"); + icl_core::config::GetoptParameter pitch_parameter ("pitch:", "p", + "Camera pitch in degrees"); + icl_core::config::GetoptParameter yaw_parameter ("yaw:", "y", + "Camera yaw in degrees"); + icl_core::config::GetoptParameter voxel_side_length_parameter("voxel_side_length:", "s", + "Side length of a voxel, default 0.01"); + icl_core::config::GetoptParameter filter_threshold_parameter ("filter_threshold:", "f", + "Density filter threshold per voxel, default 1"); + icl_core::config::GetoptParameter erode_threshold_parameter ("erode_threshold:", "e", + "Erode voxels with fewer occupied neighbors (percentage)"); + icl_core::config::addParameter(points_parameter); + icl_core::config::addParameter(roll_parameter); + icl_core::config::addParameter(pitch_parameter); + icl_core::config::addParameter(yaw_parameter); + icl_core::config::addParameter(voxel_side_length_parameter); + icl_core::config::addParameter(filter_threshold_parameter); + icl_core::config::addParameter(erode_threshold_parameter); + icl_core::logging::initialize(argc, argv); + + voxel_side_length = icl_core::config::paramOptDefault("voxel_side_length", 0.01f); + + // setup "tf" to transform from camera to world / gpu-voxels coordinates + + //const Vector3f camera_offsets(2, 0, 1); // camera located at y=0, x_max/2, z_max/2 + const Vector3f camera_offsets(map_dimensions.x * voxel_side_length * 0.5f, -0.2f, map_dimensions.z * voxel_side_length * 0.5f); // camera located at y=-0.2m, x_max/2, z_max/2 + + float roll = icl_core::config::paramOptDefault("roll", 0.0f) * 3.141592f / 180.0f; + float pitch = icl_core::config::paramOptDefault("pitch", 0.0f) * 3.141592f / 180.0f; + float yaw = icl_core::config::paramOptDefault("yaw", 0.0f) * 3.141592f / 180.0f; + tf = Matrix4f::createFromRotationAndTranslation(Matrix3f::createFromRPY(-3.14/2.0 + roll, 0 + pitch, 0 + yaw), camera_offsets); + + std::string point_cloud_topic = icl_core::config::paramOptDefault("points-topic", "/camera/depth/points"); + LOGGING_INFO(Gpu_voxels, "DistanceROSDemo start. Point-cloud topic: " << point_cloud_topic << endl); + + // Generate a GPU-Voxels instance: + gvl = gpu_voxels::GpuVoxels::getInstance(); + gvl->initialize(map_dimensions.x, map_dimensions.y, map_dimensions.z, voxel_side_length); + + ros::init(argc, argv, "distance_ros_demo"); + ros::NodeHandle nh; + ros::Subscriber sub = nh.subscribe >(point_cloud_topic, 1, callback); + + //Vis Helper + gvl->addPrimitives(primitive_array::ePRIM_SPHERE, "measurementPoints"); + + //PBA + gvl->addMap(MT_DISTANCE_VOXELMAP, "pbaDistanceVoxmap"); + shared_ptr pbaDistanceVoxmap = dynamic_pointer_cast(gvl->getMap("pbaDistanceVoxmap")); + + gvl->addMap(MT_PROBAB_VOXELMAP, "erodeTempVoxmap1"); + shared_ptr erodeTempVoxmap1 = dynamic_pointer_cast(gvl->getMap("erodeTempVoxmap1")); + gvl->addMap(MT_PROBAB_VOXELMAP, "erodeTempVoxmap2"); + shared_ptr erodeTempVoxmap2 = dynamic_pointer_cast(gvl->getMap("erodeTempVoxmap2")); + + gvl->addMap(MT_COUNTING_VOXELLIST, "countingVoxelList"); + shared_ptr countingVoxelList = dynamic_pointer_cast(gvl->getMap("countingVoxelList")); + + gvl->addMap(MT_COUNTING_VOXELLIST, "countingVoxelListFiltered"); + shared_ptr countingVoxelListFiltered = dynamic_pointer_cast(gvl->getMap("countingVoxelListFiltered")); + + //PBA map clone for visualization without artifacts + gvl->addMap(MT_DISTANCE_VOXELMAP, "pbaDistanceVoxmapVisual"); + shared_ptr pbaDistanceVoxmapVisual = dynamic_pointer_cast(gvl->getMap("pbaDistanceVoxmapVisual")); + pbaDistanceVoxmapVisual->clearMap(); + + // Define two measurement points: + std::vector measurement_points; + measurement_points.push_back(Vector3i(40, 100, 50)); + measurement_points.push_back(Vector3i(160, 100, 50)); + gvl->modifyPrimitives("measurementPoints", measurement_points, 5); + + int filter_threshold = icl_core::config::paramOptDefault("filter_threshold", 0); + std::cout << "Remove voxels containing less points than: " << filter_threshold << std::endl; + + float erode_threshold = icl_core::config::paramOptDefault("erode_threshold", 0.0f); + std::cout << "Erode voxels with neighborhood occupancy ratio less or equal to: " << erode_threshold << std::endl; + + ros::Rate r(30); + size_t iteration = 0; + new_data_received = true; // call visualize on the first iteration + + LOGGING_INFO(Gpu_voxels, "start visualizing maps" << endl); + while (ros::ok()) + { + ros::spinOnce(); + + LOGGING_DEBUG(Gpu_voxels, "START iteration" << endl); + + // visualize new pointcloud if there is new data + if (new_data_received) + { + new_data_received = false; + iteration++; + + pbaDistanceVoxmap->clearMap(); + countingVoxelList->clearMap(); + countingVoxelListFiltered->clearMap(); + erodeTempVoxmap1->clearMap(); + erodeTempVoxmap2->clearMap(); + + // Insert the CAMERA data (now in world coordinates) into the list + countingVoxelList->insertPointCloud(my_point_cloud, eBVM_OCCUPIED); + gvl->visualizeMap("countingVoxelList"); + + countingVoxelListFiltered->merge(countingVoxelList); + countingVoxelListFiltered->remove_underpopulated(filter_threshold); + gvl->visualizeMap("countingVoxelListFiltered"); + + LOGGING_INFO(Gpu_voxels, "erode voxels into pbaDistanceVoxmap" << endl); + erodeTempVoxmap1->merge(countingVoxelListFiltered); + if (erode_threshold > 0) + { + erodeTempVoxmap1->erodeInto(*erodeTempVoxmap2, erode_threshold); + } else + { + erodeTempVoxmap1->erodeLonelyInto(*erodeTempVoxmap2); //erode only "lonely voxels" without occupied neighbors + } + pbaDistanceVoxmap->mergeOccupied(erodeTempVoxmap2); + // Calculate the distance map: + LOGGING_INFO(Gpu_voxels, "calculate distance map for " << countingVoxelList->getDimensions().x << " occupied voxels" << endl); + pbaDistanceVoxmap->parallelBanding3D(); + + LOGGING_INFO(Gpu_voxels, "start cloning pbaDistanceVoxmap" << endl); + pbaDistanceVoxmapVisual->clone(*(pbaDistanceVoxmap.get())); + LOGGING_INFO(Gpu_voxels, "done cloning pbaDistanceVoxmap" << endl); + + gvl->visualizeMap("pbaDistanceVoxmapVisual"); + gvl->visualizePrimitivesArray("measurementPoints"); + + // For the measurement points we query the clearance to the closest obstacle: + thrust::device_ptr dvm_thrust_ptr(pbaDistanceVoxmap->getDeviceDataPtr()); + for(size_t i = 0; i < measurement_points.size(); i++) + { + int id = voxelmap::getVoxelIndexSigned(map_dimensions, measurement_points[i]); + + //get DistanceVoxel with closest obstacle information + // DistanceVoxel dv = dvm_thrust_ptr[id]; // worked before Cuda9 + DistanceVoxel dv; //get DistanceVoxel with closest obstacle information + cudaMemcpy(&dv, (dvm_thrust_ptr+id).get(), sizeof(DistanceVoxel), cudaMemcpyDeviceToHost); + + float metric_free_space = sqrtf(dv.squaredObstacleDistance(measurement_points[i])) * voxel_side_length; + LOGGING_INFO(Gpu_voxels, "Obstacle @ " << dv.getObstacle() << " Voxel @ " << measurement_points[i] << " has a clearance of " << metric_free_space << "m." << endl); + } + } + + r.sleep(); + } + + LOGGING_INFO(Gpu_voxels, "shutting down" << endl); + + exit(EXIT_SUCCESS); +} diff --git a/packages/gpu_voxels/src/gpu_voxels/octree/NTree.hpp b/packages/gpu_voxels/src/gpu_voxels/octree/NTree.hpp index fc2aae2..4ed2a52 100644 --- a/packages/gpu_voxels/src/gpu_voxels/octree/NTree.hpp +++ b/packages/gpu_voxels/src/gpu_voxels/octree/NTree.hpp @@ -65,10 +65,11 @@ namespace cub = thrust::system::cuda::detail::cub_; #else // Cuda 9 or higher #define THRUST_CUB_NS_PREFIX namespace thrust { namespace cuda_cub { #define THRUST_CUB_NS_POSTFIX } } -#include -#undef CUB_NS_PREFIX -#undef CUB_NS_POSTFIX -namespace cub = thrust::cuda_cub::cub; +//#include +#include +//#undef CUB_NS_PREFIX +//#undef CUB_NS_POSTFIX +//namespace cub = thrust::cuda_cub::cub; #endif // Internal dependencies diff --git a/packages/gpu_voxels/src/gpu_voxels/octree/NTreeData.h b/packages/gpu_voxels/src/gpu_voxels/octree/NTreeData.h index c07c135..e6761c9 100644 --- a/packages/gpu_voxels/src/gpu_voxels/octree/NTreeData.h +++ b/packages/gpu_voxels/src/gpu_voxels/octree/NTreeData.h @@ -44,10 +44,11 @@ namespace cub = thrust::system::cuda::detail::cub_; #else // Cuda 9 or higher #define THRUST_CUB_NS_PREFIX namespace thrust { namespace cuda_cub { #define THRUST_CUB_NS_POSTFIX } } -#include -#undef CUB_NS_PREFIX -#undef CUB_NS_POSTFIX -namespace cub = thrust::cuda_cub::cub; +//#include +#include +//#undef CUB_NS_PREFIX +//#undef CUB_NS_POSTFIX +//namespace cub = thrust::cuda_cub::cub; #endif namespace gpu_voxels { diff --git a/packages/gpu_voxels/src/gpu_voxels/octree/PointCloud.cu b/packages/gpu_voxels/src/gpu_voxels/octree/PointCloud.cu index 5b3c009..2de9496 100644 --- a/packages/gpu_voxels/src/gpu_voxels/octree/PointCloud.cu +++ b/packages/gpu_voxels/src/gpu_voxels/octree/PointCloud.cu @@ -19,12 +19,14 @@ namespace cub = thrust::system::cuda::detail::cub_; #else // Cuda 9 or higher #define THRUST_CUB_NS_PREFIX namespace thrust { namespace cuda_cub { #define THRUST_CUB_NS_POSTFIX } } -#include -#undef CUB_NS_PREFIX -#undef CUB_NS_POSTFIX -namespace cub = thrust::cuda_cub::cub; +//#include +#include +//#undef CUB_NS_PREFIX +//#undef CUB_NS_POSTFIX +//namespace cub = thrust::cuda_cub::cub; #endif + #include #include #include diff --git a/packages/gpu_voxels/src/gpu_voxels/octree/kernels/kernel_common.h b/packages/gpu_voxels/src/gpu_voxels/octree/kernels/kernel_common.h index 4e9ff6e..8641618 100644 --- a/packages/gpu_voxels/src/gpu_voxels/octree/kernels/kernel_common.h +++ b/packages/gpu_voxels/src/gpu_voxels/octree/kernels/kernel_common.h @@ -43,10 +43,11 @@ namespace cub = thrust::system::cuda::detail::cub_; #else // Cuda 9 or higher #define THRUST_CUB_NS_PREFIX namespace thrust { namespace cuda_cub { #define THRUST_CUB_NS_POSTFIX } } -#include -#undef CUB_NS_PREFIX -#undef CUB_NS_POSTFIX -namespace cub = thrust::cuda_cub::cub; +//#include +#include +//#undef CUB_NS_PREFIX +//#undef CUB_NS_POSTFIX +//namespace cub = thrust::cuda_cub::cub; #endif // __ballot has been replaced by __ballot_sync in Cuda9