From 3820d2eb900dbe51a3cce53e4ff0fb98ed30ecac Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 11 Dec 2023 15:44:07 -0600 Subject: [PATCH] Add changes needed for CCCL 2.2.0 support. --- cpp/CMakeLists.txt | 3 ++ cpp/cmake/thirdparty/get_rmm.cmake | 23 ++++++++++++ cpp/cmake/thirdparty/get_thrust.cmake | 23 ++++++++++++ cpp/src/kmeans/kmeans_mg_impl.cuh | 54 +++++++++++++++------------ 4 files changed, 79 insertions(+), 24 deletions(-) create mode 100644 cpp/cmake/thirdparty/get_rmm.cmake create mode 100644 cpp/cmake/thirdparty/get_thrust.cmake diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index e83f31e3a4..c2b5437abf 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -220,7 +220,10 @@ if(BUILD_CUML_TESTS OR BUILD_PRIMS_TESTS) find_package(Threads) endif() +# thrust before rmm, rmm before raft so we get the right version of thrust/rmm +include(cmake/thirdparty/get_thrust.cmake) include(cmake/thirdparty/get_libcudacxx.cmake) +include(cmake/thirdparty/get_rmm.cmake) include(cmake/thirdparty/get_raft.cmake) if(LINK_TREELITE) diff --git a/cpp/cmake/thirdparty/get_rmm.cmake b/cpp/cmake/thirdparty/get_rmm.cmake new file mode 100644 index 0000000000..35968f7245 --- /dev/null +++ b/cpp/cmake/thirdparty/get_rmm.cmake @@ -0,0 +1,23 @@ +#============================================================================= +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_rmm) + include(${rapids-cmake-dir}/cpm/rmm.cmake) + rapids_cpm_rmm(BUILD_EXPORT_SET cuml-exports + INSTALL_EXPORT_SET cuml-exports) +endfunction() + +find_and_configure_rmm() diff --git a/cpp/cmake/thirdparty/get_thrust.cmake b/cpp/cmake/thirdparty/get_thrust.cmake new file mode 100644 index 0000000000..1477a8b397 --- /dev/null +++ b/cpp/cmake/thirdparty/get_thrust.cmake @@ -0,0 +1,23 @@ +# ============================================================================= +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Use CPM to find or clone thrust +function(find_and_configure_thrust) + include(${rapids-cmake-dir}/cpm/thrust.cmake) + rapids_cpm_thrust(NAMESPACE cuml + BUILD_EXPORT_SET cuml-exports + INSTALL_EXPORT_SET cuml-exports) +endfunction() + +find_and_configure_thrust() diff --git a/cpp/src/kmeans/kmeans_mg_impl.cuh b/cpp/src/kmeans/kmeans_mg_impl.cuh index f1a0470652..c53c346df8 100644 --- a/cpp/src/kmeans/kmeans_mg_impl.cuh +++ b/cpp/src/kmeans/kmeans_mg_impl.cuh @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -241,7 +242,8 @@ void initKMeansPlusPlus(const raft::handle_t& handle, minClusterDistance.view(), workspace, clusterCost.view(), - [] __device__(const DataT& a, const DataT& b) { return a + b; }); + cuda::proclaim_return_type( + [] __device__(const DataT& a, const DataT& b) { return a + b; })); // compute total cluster cost by accumulating the partial cost from all the // ranks @@ -291,7 +293,8 @@ void initKMeansPlusPlus(const raft::handle_t& handle, minClusterDistance.view(), workspace, clusterCost.view(), - [] __device__(const DataT& a, const DataT& b) { return a + b; }); + cuda::proclaim_return_type( + [] __device__(const DataT& a, const DataT& b) { return a + b; })); comm.allreduce( clusterCost.data_handle(), clusterCost.data_handle(), 1, raft::comms::op_t::SUM, stream); raft::copy(&psi, clusterCost.data_handle(), 1, stream); @@ -481,7 +484,7 @@ void checkWeights(const raft::handle_t& handle, weight.data_handle(), weight.data_handle(), weight.size(), - [=] __device__(const DataT& wt) { return wt * scale; }, + cuda::proclaim_return_type([=] __device__(const DataT& wt) { return wt * scale; }), stream); } } @@ -621,12 +624,12 @@ void fit(const raft::handle_t& handle, newCentroids.extent(0), true, false, - [=] __device__(DataT mat, DataT vec) { + cuda::proclaim_return_type([=] __device__(DataT mat, DataT vec) { if (vec == 0) return DataT(0); else return mat / vec; - }, + }), stream); // copy the centroids[i] to newCentroids[i] when wtInCluster[i] is 0 @@ -639,16 +642,18 @@ void fit(const raft::handle_t& handle, itr_wt, wtInCluster.extent(0), newCentroids.data_handle(), - [=] __device__(raft::KeyValuePair map) { // predicate - // copy when the # of samples in the cluster is 0 - if (map.value == 0) - return true; - else - return false; - }, - [=] __device__(raft::KeyValuePair map) { // map - return map.key; - }, + cuda::proclaim_return_type( + [=] __device__(raft::KeyValuePair map) { // predicate + // copy when the # of samples in the cluster is 0 + if (map.value == 0) + return true; + else + return false; + }), + cuda::proclaim_return_type( + [=] __device__(raft::KeyValuePair map) { // map + return map.key; + }), stream); // compute the squared norm between the newCentroids and the original @@ -657,10 +662,10 @@ void fit(const raft::handle_t& handle, raft::linalg::mapThenSumReduce( sqrdNorm.data_handle(), newCentroids.size(), - [=] __device__(const DataT a, const DataT b) { + cuda::proclaim_return_type([=] __device__(const DataT a, const DataT b) { DataT diff = a - b; return diff * diff; - }, + }), stream, centroids.data_handle(), newCentroids.data_handle()); @@ -680,13 +685,14 @@ void fit(const raft::handle_t& handle, minClusterAndDistance.view(), workspace, raft::make_device_scalar_view(clusterCostD.data()), - [] __device__(const raft::KeyValuePair& a, - const raft::KeyValuePair& b) { - raft::KeyValuePair res; - res.key = 0; - res.value = a.value + b.value; - return res; - }); + cuda::proclaim_return_type>( + [] __device__(const raft::KeyValuePair& a, + const raft::KeyValuePair& b) { + raft::KeyValuePair res; + res.key = 0; + res.value = a.value + b.value; + return res; + })); // Cluster cost phi_x(C) from all ranks comm.allreduce(&(clusterCostD.data()->value),