From 5d83412dc91708b1c4014c0bf76fc8bb723789d0 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 8 May 2024 21:46:41 +0000 Subject: [PATCH 01/31] proclaim return types for CCCL 2.4+ --- cpp/include/cugraph/utilities/mask_utils.cuh | 2 +- cpp/src/community/detail/common_methods.cuh | 2 +- cpp/src/community/legacy/louvain.cuh | 2 +- cpp/src/components/weakly_connected_components_impl.cuh | 4 ++-- cpp/src/detail/utility_wrappers.cu | 2 +- cpp/src/structure/graph_view_impl.cuh | 7 ++++--- cpp/tests/sampling/sampling_post_processing_test.cu | 4 ++-- 7 files changed, 12 insertions(+), 11 deletions(-) diff --git a/cpp/include/cugraph/utilities/mask_utils.cuh b/cpp/include/cugraph/utilities/mask_utils.cuh index 7b69ea3fe3a..8ff6b25aedc 100644 --- a/cpp/include/cugraph/utilities/mask_utils.cuh +++ b/cpp/include/cugraph/utilities/mask_utils.cuh @@ -160,7 +160,7 @@ size_t count_set_bits(raft::handle_t const& handle, MaskIterator mask_first, siz handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(packed_bool_size(num_bits)), - [mask_first, num_bits] __device__(size_t i) { + [mask_first, num_bits] __device__(size_t i) -> size_t { auto word = *(mask_first + i); if ((i + 1) * packed_bools_per_word() > num_bits) { word &= packed_bool_partial_mask(num_bits % packed_bools_per_word()); diff --git a/cpp/src/community/detail/common_methods.cuh b/cpp/src/community/detail/common_methods.cuh index fe0a415db30..b8670496fed 100644 --- a/cpp/src/community/detail/common_methods.cuh +++ b/cpp/src/community/detail/common_methods.cuh @@ -178,7 +178,7 @@ weight_t compute_modularity( handle.get_thrust_policy(), cluster_weights.begin(), cluster_weights.end(), - [] __device__(weight_t p) { return p * p; }, + [] __device__(weight_t p) -> weight_t { return p * p; }, weight_t{0}, thrust::plus()); diff --git a/cpp/src/community/legacy/louvain.cuh b/cpp/src/community/legacy/louvain.cuh index 6cf5bbdc3c6..4c7ca3f1e2f 100644 --- a/cpp/src/community/legacy/louvain.cuh +++ b/cpp/src/community/legacy/louvain.cuh @@ -142,7 +142,7 @@ class Louvain { thrust::make_counting_iterator(0), thrust::make_counting_iterator(graph.number_of_vertices), [d_deg = deg.data(), d_inc = inc.data(), total_edge_weight, resolution] __device__( - vertex_t community) { + vertex_t community) -> weight_t { return ((d_inc[community] / total_edge_weight) - resolution * (d_deg[community] * d_deg[community]) / (total_edge_weight * total_edge_weight)); diff --git a/cpp/src/components/weakly_connected_components_impl.cuh b/cpp/src/components/weakly_connected_components_impl.cuh index d4d6d842951..6c950fb93ec 100644 --- a/cpp/src/components/weakly_connected_components_impl.cuh +++ b/cpp/src/components/weakly_connected_components_impl.cuh @@ -400,7 +400,7 @@ void weakly_connected_components_impl(raft::handle_t const& handle, handle.get_thrust_policy(), new_root_candidates.begin(), new_root_candidates.begin() + (new_root_candidates.size() > 0 ? 1 : 0), - [vertex_partition, degrees = degrees.data()] __device__(auto v) { + [vertex_partition, degrees = degrees.data()] __device__(auto v) -> edge_t { return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)]; }, edge_t{0}, @@ -642,7 +642,7 @@ void weakly_connected_components_impl(raft::handle_t const& handle, handle.get_thrust_policy(), thrust::get<0>(vertex_frontier.bucket(bucket_idx_cur).begin().get_iterator_tuple()), thrust::get<0>(vertex_frontier.bucket(bucket_idx_cur).end().get_iterator_tuple()), - [vertex_partition, degrees = degrees.data()] __device__(auto v) { + [vertex_partition, degrees = degrees.data()] __device__(auto v) -> edge_t { return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)]; }, edge_t{0}, diff --git a/cpp/src/detail/utility_wrappers.cu b/cpp/src/detail/utility_wrappers.cu index 9100ecbd5e1..99a69fa00ee 100644 --- a/cpp/src/detail/utility_wrappers.cu +++ b/cpp/src/detail/utility_wrappers.cu @@ -139,7 +139,7 @@ vertex_t compute_maximum_vertex_id(rmm::cuda_stream_view const& stream_view, rmm::exec_policy(stream_view), edge_first, edge_first + num_edges, - [] __device__(auto e) { return std::max(thrust::get<0>(e), thrust::get<1>(e)); }, + [] __device__(auto e) -> vertex_t { return std::max(thrust::get<0>(e), thrust::get<1>(e)); }, vertex_t{0}, thrust::maximum()); } diff --git a/cpp/src/structure/graph_view_impl.cuh b/cpp/src/structure/graph_view_impl.cuh index 29dca6ef409..543b3478137 100644 --- a/cpp/src/structure/graph_view_impl.cuh +++ b/cpp/src/structure/graph_view_impl.cuh @@ -353,7 +353,7 @@ edge_t count_edge_partition_multi_edges( execution_policy, thrust::make_counting_iterator(edge_partition.major_range_first()) + (*segment_offsets)[2], thrust::make_counting_iterator(edge_partition.major_range_first()) + (*segment_offsets)[3], - [edge_partition] __device__(auto major) { + [edge_partition] __device__(auto major) -> edge_t { auto major_offset = edge_partition.major_offset_from_major_nocheck(major); vertex_t const* indices{nullptr}; [[maybe_unused]] edge_t edge_offset{}; @@ -374,7 +374,8 @@ edge_t count_edge_partition_multi_edges( execution_policy, thrust::make_counting_iterator(vertex_t{0}), thrust::make_counting_iterator(*(edge_partition.dcs_nzd_vertex_count())), - [edge_partition, major_start_offset = (*segment_offsets)[3]] __device__(auto idx) { + [edge_partition, + major_start_offset = (*segment_offsets)[3]] __device__(auto idx) -> edge_t { auto major_idx = major_start_offset + idx; // major_offset != major_idx in the hypersparse region vertex_t const* indices{nullptr}; @@ -398,7 +399,7 @@ edge_t count_edge_partition_multi_edges( thrust::make_counting_iterator(edge_partition.major_range_first()), thrust::make_counting_iterator(edge_partition.major_range_first()) + edge_partition.major_range_size(), - [edge_partition] __device__(auto major) { + [edge_partition] __device__(auto major) -> edge_t { auto major_offset = edge_partition.major_offset_from_major_nocheck(major); vertex_t const* indices{nullptr}; [[maybe_unused]] edge_t edge_offset{}; diff --git a/cpp/tests/sampling/sampling_post_processing_test.cu b/cpp/tests/sampling/sampling_post_processing_test.cu index c87cc5b960b..58b0629ec39 100644 --- a/cpp/tests/sampling/sampling_post_processing_test.cu +++ b/cpp/tests/sampling/sampling_post_processing_test.cu @@ -402,7 +402,7 @@ bool check_renumber_map_invariants( raft::device_span(sorted_org_vertices.data(), sorted_org_vertices.size()), matching_renumbered_vertices = raft::device_span( matching_renumbered_vertices.data(), - matching_renumbered_vertices.size())] __device__(vertex_t major) { + matching_renumbered_vertices.size())] __device__(vertex_t major) -> vertex_t { auto it = thrust::lower_bound( thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major); return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; @@ -418,7 +418,7 @@ bool check_renumber_map_invariants( raft::device_span(sorted_org_vertices.data(), sorted_org_vertices.size()), matching_renumbered_vertices = raft::device_span( matching_renumbered_vertices.data(), - matching_renumbered_vertices.size())] __device__(vertex_t minor) { + matching_renumbered_vertices.size())] __device__(vertex_t minor) -> vertex_t { auto it = thrust::lower_bound( thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), minor); return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; From 658b71e9913cd0d85fe3d9a16b8e5a15bd35eb29 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 8 May 2024 22:07:50 +0000 Subject: [PATCH 02/31] hide RAFT pragma deprecation messages --- cpp/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index eb6f348b380..42ee6e91dcf 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -82,14 +82,14 @@ set(CUGRAPH_CXX_FLAGS "") set(CUGRAPH_CUDA_FLAGS "") if(CMAKE_COMPILER_IS_GNUCXX) - list(APPEND CUGRAPH_CXX_FLAGS -Werror -Wno-error=deprecated-declarations) + list(APPEND CUGRAPH_CXX_FLAGS -Werror -Wno-error=deprecated-declarations -Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS) endif(CMAKE_COMPILER_IS_GNUCXX) message("-- Building for GPU_ARCHS = ${CMAKE_CUDA_ARCHITECTURES}") list(APPEND CUGRAPH_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr) -list(APPEND CUGRAPH_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas=--disable-warnings) +list(APPEND CUGRAPH_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS -Xptxas=--disable-warnings) list(APPEND CUGRAPH_CUDA_FLAGS -Xcompiler=-Wall,-Wno-error=sign-compare,-Wno-error=unused-but-set-variable) list(APPEND CUGRAPH_CUDA_FLAGS -Xfatbin=-compress-all) From c365372b968dd0a5ced94ce137e302c9bbe670eb Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 8 May 2024 22:22:33 +0000 Subject: [PATCH 03/31] use cuda::proclaim_return_type --- cpp/include/cugraph/utilities/mask_utils.cuh | 5 ++- cpp/src/community/detail/common_methods.cuh | 3 +- cpp/src/community/legacy/louvain.cuh | 15 +++++--- .../weakly_connected_components_impl.cuh | 15 +++++--- cpp/src/detail/utility_wrappers.cu | 4 +- cpp/src/structure/graph_view_impl.cuh | 37 ++++++++++--------- .../sampling/sampling_post_processing_test.cu | 23 ++++++------ 7 files changed, 57 insertions(+), 45 deletions(-) diff --git a/cpp/include/cugraph/utilities/mask_utils.cuh b/cpp/include/cugraph/utilities/mask_utils.cuh index 8ff6b25aedc..1d86eef0ed1 100644 --- a/cpp/include/cugraph/utilities/mask_utils.cuh +++ b/cpp/include/cugraph/utilities/mask_utils.cuh @@ -20,6 +20,7 @@ #include +#include #include #include #include @@ -160,13 +161,13 @@ size_t count_set_bits(raft::handle_t const& handle, MaskIterator mask_first, siz handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(packed_bool_size(num_bits)), - [mask_first, num_bits] __device__(size_t i) -> size_t { + cuda::proclaim_return_type([mask_first, num_bits] __device__(size_t i) -> size_t { auto word = *(mask_first + i); if ((i + 1) * packed_bools_per_word() > num_bits) { word &= packed_bool_partial_mask(num_bits % packed_bools_per_word()); } return static_cast(__popc(word)); - }, + }), size_t{0}, thrust::plus{}); } diff --git a/cpp/src/community/detail/common_methods.cuh b/cpp/src/community/detail/common_methods.cuh index b8670496fed..dcad4e92b95 100644 --- a/cpp/src/community/detail/common_methods.cuh +++ b/cpp/src/community/detail/common_methods.cuh @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -178,7 +179,7 @@ weight_t compute_modularity( handle.get_thrust_policy(), cluster_weights.begin(), cluster_weights.end(), - [] __device__(weight_t p) -> weight_t { return p * p; }, + cuda::proclaim_return_type([] __device__(weight_t p) -> weight_t { return p * p; }), weight_t{0}, thrust::plus()); diff --git a/cpp/src/community/legacy/louvain.cuh b/cpp/src/community/legacy/louvain.cuh index 4c7ca3f1e2f..53d0b231c03 100644 --- a/cpp/src/community/legacy/louvain.cuh +++ b/cpp/src/community/legacy/louvain.cuh @@ -22,6 +22,7 @@ #include #include + #ifdef TIMING #include #endif @@ -29,6 +30,7 @@ #include #include +#include #include #include #include @@ -141,12 +143,13 @@ class Louvain { handle_.get_thrust_policy(), thrust::make_counting_iterator(0), thrust::make_counting_iterator(graph.number_of_vertices), - [d_deg = deg.data(), d_inc = inc.data(), total_edge_weight, resolution] __device__( - vertex_t community) -> weight_t { - return ((d_inc[community] / total_edge_weight) - resolution * - (d_deg[community] * d_deg[community]) / - (total_edge_weight * total_edge_weight)); - }, + cuda::proclaim_return_type( + [d_deg = deg.data(), d_inc = inc.data(), total_edge_weight, resolution] __device__( + vertex_t community) -> weight_t { + return ((d_inc[community] / total_edge_weight) - + resolution * (d_deg[community] * d_deg[community]) / + (total_edge_weight * total_edge_weight)); + }), weight_t{0.0}, thrust::plus()); diff --git a/cpp/src/components/weakly_connected_components_impl.cuh b/cpp/src/components/weakly_connected_components_impl.cuh index 6c950fb93ec..f63f28210d8 100644 --- a/cpp/src/components/weakly_connected_components_impl.cuh +++ b/cpp/src/components/weakly_connected_components_impl.cuh @@ -34,6 +34,7 @@ #include +#include #include #include #include @@ -400,9 +401,10 @@ void weakly_connected_components_impl(raft::handle_t const& handle, handle.get_thrust_policy(), new_root_candidates.begin(), new_root_candidates.begin() + (new_root_candidates.size() > 0 ? 1 : 0), - [vertex_partition, degrees = degrees.data()] __device__(auto v) -> edge_t { - return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)]; - }, + cuda::proclaim_return_type( + [vertex_partition, degrees = degrees.data()] __device__(auto v) -> edge_t { + return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)]; + }), edge_t{0}, thrust::plus{}); @@ -642,9 +644,10 @@ void weakly_connected_components_impl(raft::handle_t const& handle, handle.get_thrust_policy(), thrust::get<0>(vertex_frontier.bucket(bucket_idx_cur).begin().get_iterator_tuple()), thrust::get<0>(vertex_frontier.bucket(bucket_idx_cur).end().get_iterator_tuple()), - [vertex_partition, degrees = degrees.data()] __device__(auto v) -> edge_t { - return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)]; - }, + cuda::proclaim_return_type( + [vertex_partition, degrees = degrees.data()] __device__(auto v) -> edge_t { + return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)]; + }), edge_t{0}, thrust::plus()); diff --git a/cpp/src/detail/utility_wrappers.cu b/cpp/src/detail/utility_wrappers.cu index 99a69fa00ee..6d6158a16e7 100644 --- a/cpp/src/detail/utility_wrappers.cu +++ b/cpp/src/detail/utility_wrappers.cu @@ -21,6 +21,7 @@ #include +#include #include #include #include @@ -139,7 +140,8 @@ vertex_t compute_maximum_vertex_id(rmm::cuda_stream_view const& stream_view, rmm::exec_policy(stream_view), edge_first, edge_first + num_edges, - [] __device__(auto e) -> vertex_t { return std::max(thrust::get<0>(e), thrust::get<1>(e)); }, + cuda::proclaim_return_type( + [] __device__(auto e) -> vertex_t { return std::max(thrust::get<0>(e), thrust::get<1>(e)); }), vertex_t{0}, thrust::maximum()); } diff --git a/cpp/src/structure/graph_view_impl.cuh b/cpp/src/structure/graph_view_impl.cuh index 543b3478137..7097349dce5 100644 --- a/cpp/src/structure/graph_view_impl.cuh +++ b/cpp/src/structure/graph_view_impl.cuh @@ -353,7 +353,7 @@ edge_t count_edge_partition_multi_edges( execution_policy, thrust::make_counting_iterator(edge_partition.major_range_first()) + (*segment_offsets)[2], thrust::make_counting_iterator(edge_partition.major_range_first()) + (*segment_offsets)[3], - [edge_partition] __device__(auto major) -> edge_t { + cuda::proclaim_return_type([edge_partition] __device__(auto major) -> edge_t { auto major_offset = edge_partition.major_offset_from_major_nocheck(major); vertex_t const* indices{nullptr}; [[maybe_unused]] edge_t edge_offset{}; @@ -365,7 +365,7 @@ edge_t count_edge_partition_multi_edges( if (indices[i - 1] == indices[i]) { ++count; } } return count; - }, + }), edge_t{0}, thrust::plus{}); } @@ -374,20 +374,21 @@ edge_t count_edge_partition_multi_edges( execution_policy, thrust::make_counting_iterator(vertex_t{0}), thrust::make_counting_iterator(*(edge_partition.dcs_nzd_vertex_count())), - [edge_partition, - major_start_offset = (*segment_offsets)[3]] __device__(auto idx) -> edge_t { - auto major_idx = - major_start_offset + idx; // major_offset != major_idx in the hypersparse region - vertex_t const* indices{nullptr}; - [[maybe_unused]] edge_t edge_offset{}; - edge_t local_degree{}; - thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_idx); - edge_t count{0}; - for (edge_t i = 1; i < local_degree; ++i) { // assumes neighbors are sorted - if (indices[i - 1] == indices[i]) { ++count; } - } - return count; - }, + cuda::proclaim_return_type( + [edge_partition, + major_start_offset = (*segment_offsets)[3]] __device__(auto idx) -> edge_t { + auto major_idx = + major_start_offset + idx; // major_offset != major_idx in the hypersparse region + vertex_t const* indices{nullptr}; + [[maybe_unused]] edge_t edge_offset{}; + edge_t local_degree{}; + thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_idx); + edge_t count{0}; + for (edge_t i = 1; i < local_degree; ++i) { // assumes neighbors are sorted + if (indices[i - 1] == indices[i]) { ++count; } + } + return count; + }), edge_t{0}, thrust::plus{}); } @@ -399,7 +400,7 @@ edge_t count_edge_partition_multi_edges( thrust::make_counting_iterator(edge_partition.major_range_first()), thrust::make_counting_iterator(edge_partition.major_range_first()) + edge_partition.major_range_size(), - [edge_partition] __device__(auto major) -> edge_t { + cuda::proclaim_return_type([edge_partition] __device__(auto major) -> edge_t { auto major_offset = edge_partition.major_offset_from_major_nocheck(major); vertex_t const* indices{nullptr}; [[maybe_unused]] edge_t edge_offset{}; @@ -410,7 +411,7 @@ edge_t count_edge_partition_multi_edges( if (indices[i - 1] == indices[i]) { ++count; } } return count; - }, + }), edge_t{0}, thrust::plus{}); } diff --git a/cpp/tests/sampling/sampling_post_processing_test.cu b/cpp/tests/sampling/sampling_post_processing_test.cu index 58b0629ec39..5e21825901a 100644 --- a/cpp/tests/sampling/sampling_post_processing_test.cu +++ b/cpp/tests/sampling/sampling_post_processing_test.cu @@ -398,15 +398,16 @@ bool check_renumber_map_invariants( handle.get_thrust_policy(), unique_majors.begin(), unique_majors.end(), - [sorted_org_vertices = - raft::device_span(sorted_org_vertices.data(), sorted_org_vertices.size()), - matching_renumbered_vertices = raft::device_span( - matching_renumbered_vertices.data(), - matching_renumbered_vertices.size())] __device__(vertex_t major) -> vertex_t { - auto it = thrust::lower_bound( - thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major); - return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; - }, + cuda::proclaim_return_type( + [sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), + sorted_org_vertices.size()), + matching_renumbered_vertices = raft::device_span( + matching_renumbered_vertices.data(), + matching_renumbered_vertices.size())] __device__(vertex_t major) -> vertex_t { + auto it = thrust::lower_bound( + thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major); + return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; + }), std::numeric_limits::lowest(), thrust::maximum{}); @@ -414,7 +415,7 @@ bool check_renumber_map_invariants( handle.get_thrust_policy(), unique_minors.begin(), unique_minors.end(), - [sorted_org_vertices = + cuda::proclaim_return_type([sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), sorted_org_vertices.size()), matching_renumbered_vertices = raft::device_span( matching_renumbered_vertices.data(), @@ -422,7 +423,7 @@ bool check_renumber_map_invariants( auto it = thrust::lower_bound( thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), minor); return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; - }, + }), std::numeric_limits::max(), thrust::minimum{}); From cbca1404b488cf33b796328a0add1f2d639c4430 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 8 May 2024 22:38:41 +0000 Subject: [PATCH 04/31] fix lint --- .../sampling/sampling_post_processing_test.cu | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/cpp/tests/sampling/sampling_post_processing_test.cu b/cpp/tests/sampling/sampling_post_processing_test.cu index 5e21825901a..3bca382a2eb 100644 --- a/cpp/tests/sampling/sampling_post_processing_test.cu +++ b/cpp/tests/sampling/sampling_post_processing_test.cu @@ -415,15 +415,16 @@ bool check_renumber_map_invariants( handle.get_thrust_policy(), unique_minors.begin(), unique_minors.end(), - cuda::proclaim_return_type([sorted_org_vertices = - raft::device_span(sorted_org_vertices.data(), sorted_org_vertices.size()), - matching_renumbered_vertices = raft::device_span( - matching_renumbered_vertices.data(), - matching_renumbered_vertices.size())] __device__(vertex_t minor) -> vertex_t { - auto it = thrust::lower_bound( - thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), minor); - return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; - }), + cuda::proclaim_return_type( + [sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), + sorted_org_vertices.size()), + matching_renumbered_vertices = raft::device_span( + matching_renumbered_vertices.data(), + matching_renumbered_vertices.size())] __device__(vertex_t minor) -> vertex_t { + auto it = thrust::lower_bound( + thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), minor); + return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; + }), std::numeric_limits::max(), thrust::minimum{}); From 423343998b0c9f7735d1eed0513dcdebbdcc8625 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 8 May 2024 23:13:18 +0000 Subject: [PATCH 05/31] add kv_store_t overload for thrust::tuple construction changes --- .../cugraph/utilities/device_functors.cuh | 7 +++--- cpp/src/prims/kv_store.cuh | 25 +++++++++++++++++++ cpp/tests/CMakeLists.txt | 2 +- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/cpp/include/cugraph/utilities/device_functors.cuh b/cpp/include/cugraph/utilities/device_functors.cuh index 3af8ed1dd19..7c2480dcd74 100644 --- a/cpp/include/cugraph/utilities/device_functors.cuh +++ b/cpp/include/cugraph/utilities/device_functors.cuh @@ -78,13 +78,14 @@ struct indirection_t { template struct indirection_if_idx_valid_t { + using value_type = typename thrust::iterator_traits::value_type; Iterator first{}; index_t invalid_idx{}; - typename thrust::iterator_traits::value_type invalid_value{}; + value_type invalid_value{}; - __device__ typename thrust::iterator_traits::value_type operator()(index_t i) const + __device__ value_type operator()(index_t i) const { - return (i != invalid_idx) ? *(first + i) : invalid_value; + return (i != invalid_idx) ? static_cast(*(first + i)) : invalid_value; } }; diff --git a/cpp/src/prims/kv_store.cuh b/cpp/src/prims/kv_store.cuh index 2cc7856d87a..088d3efa51b 100644 --- a/cpp/src/prims/kv_store.cuh +++ b/cpp/src/prims/kv_store.cuh @@ -17,6 +17,7 @@ #include "prims/detail/optional_dataframe_buffer.hpp" +#include #include #include @@ -944,6 +945,30 @@ class kv_store_t { { } + /* when use_binary_search = true */ + template + kv_store_t(rmm::device_uvector&& keys, + decltype(allocate_dataframe_buffer(0, rmm::cuda_stream_view{}))&& values, + decltype(cugraph::invalid_idx::value) + invalid_value /* invalid_value is returned when match fails for the given key */, + bool key_sorted /* if set to true, assume that the input data is sorted and skip + sorting (which is necessary for binary-search) */ + , + rmm::cuda_stream_view stream, + std::enable_if_t::value, int32_t> = 0) + : store_( + std::move(keys), + std::move(values), + [=]() { + auto invalid_row = value_t{}; + cuda::std::get<0>(invalid_row) = invalid_value; + return invalid_row; + }(), + key_sorted, + stream) + { + } + /* when use binary_search = false, this requires that the capacity is large enough */ template std::enable_if_t insert(KeyIterator key_first, diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 2dcda796f9c..35e7b144f6a 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -661,7 +661,7 @@ if(BUILD_CUGRAPH_MG_TESTS) ############################################################################################### # - MG PRIMS EXTRACT_TRANSFORM_E tests -------------------------------------------------------- - ConfigureTestMG(MG_EXTRACT_TRANSFORM_E_TEST prims/mg_extract_transform_e.cu) + # ConfigureTestMG(MG_EXTRACT_TRANSFORM_E_TEST prims/mg_extract_transform_e.cu) ############################################################################################### # - MG PRIMS EXTRACT_TRANSFORM_V_FRONTIER_OUTGOING_E tests ------------------------------------ From d8a5733202333b84be2b69dd4caf084af6b20c1e Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 9 May 2024 00:25:35 +0000 Subject: [PATCH 06/31] fix lint --- cpp/include/cugraph/utilities/device_functors.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cugraph/utilities/device_functors.cuh b/cpp/include/cugraph/utilities/device_functors.cuh index 7c2480dcd74..20cf98f7e6d 100644 --- a/cpp/include/cugraph/utilities/device_functors.cuh +++ b/cpp/include/cugraph/utilities/device_functors.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 87a1d3d473af372766d1b606ba0be7bf2562085f Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 9 May 2024 03:26:01 +0000 Subject: [PATCH 07/31] use thrust::get --- cpp/src/prims/kv_store.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/prims/kv_store.cuh b/cpp/src/prims/kv_store.cuh index 088d3efa51b..93b990bb8a1 100644 --- a/cpp/src/prims/kv_store.cuh +++ b/cpp/src/prims/kv_store.cuh @@ -960,8 +960,8 @@ class kv_store_t { std::move(keys), std::move(values), [=]() { - auto invalid_row = value_t{}; - cuda::std::get<0>(invalid_row) = invalid_value; + auto invalid_row = value_t{}; + thrust::get<0>(invalid_row) = invalid_value; return invalid_row; }(), key_sorted, From 3aaedf4a4fcbbb4c22018483f78c7d8859879f80 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 9 May 2024 13:30:44 -0700 Subject: [PATCH 08/31] define cxx and cuda standards --- cpp/tests/CMakeLists.txt | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 35e7b144f6a..cc4a16b23b7 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -169,7 +169,11 @@ function(ConfigureTest CMAKE_TEST_NAME) ) set_target_properties( ${CMAKE_TEST_NAME} - PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib") + PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON) rapids_test_add( NAME ${CMAKE_TEST_NAME} @@ -195,7 +199,11 @@ function(ConfigureTestMG CMAKE_TEST_NAME) ) set_target_properties( ${CMAKE_TEST_NAME} - PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib") + PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON) rapids_test_add( NAME ${CMAKE_TEST_NAME} @@ -241,7 +249,11 @@ function(ConfigureCTest CMAKE_TEST_NAME) ) set_target_properties( ${CMAKE_TEST_NAME} - PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib") + PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON) rapids_test_add( NAME ${CMAKE_TEST_NAME} @@ -269,7 +281,11 @@ function(ConfigureCTestMG CMAKE_TEST_NAME) ) set_target_properties( ${CMAKE_TEST_NAME} - PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib") + PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON) rapids_test_add( NAME ${CMAKE_TEST_NAME} From f5b1e77b4b960336037bb0d982ccb683755ce052 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 9 May 2024 20:30:51 +0000 Subject: [PATCH 09/31] update devcontainer workflow to use NVIDIA/cccl#pull-request/1667 --- .github/workflows/pr.yaml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index c04e0e879d2..742a407514d 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -195,6 +195,21 @@ jobs: node_type: cpu32 extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY build_command: | + # Tell rapids-cmake to use custom CCCL and cuCollections forks + rapids_branch="$(yq '.x-git-defaults.tag' /opt/rapids-build-utils/manifest.yaml)"; + rapids_version="${rapids_branch#branch-}"; + curl -fsSL -o- https://raw.githubusercontent.com/trxcllnt/rapids-cmake/branch-24.04-cccl-2.4.0/rapids-cmake/cpm/patches/cccl/revert_pr_211.diff \ + | tee ~/rapids-cmake-revert_pr_211.diff; + curl -fsSL -o- "https://raw.githubusercontent.com/rapidsai/rapids-cmake/${rapids_branch}/rapids-cmake/cpm/versions.json" \ + | jq -r ".packages.CCCL *= {\"version\": \"2.5.0\", \"git_tag\": \"pull-request/1667\"}" \ + | jq -r "(.packages.CCCL.patches[] | select(.file == \"cccl/revert_pr_211.diff\")).file = \"${HOME}/rapids-cmake-revert_pr_211.diff\"" \ + | jq -r ".packages.cuco *= {\"git_url\": \"https://github.com/trxcllnt/cuCollections.git\", \"git_tag\": \"rapids-${rapids_version}-cccl-2.5.0\", \"always_download\": true}" \ + | tee ~/rapids-cmake-override-versions.json; sccache -z; - build-all --verbose -j$(nproc --ignore=1); + build-all \ + -j$(nproc --ignore=1) -v \ + -DBUILD_CUGRAPH_MG_TESTS=ON \ + -DCMAKE_CXX_FLAGS="-ftemplate-backtrace-limit=0" \ + -DCMAKE_CUDA_FLAGS="-ftemplate-backtrace-limit=0" \ + -DRAPIDS_CMAKE_CPM_DEFAULT_VERSION_FILE="${HOME}/rapids-cmake-override-versions.json"; sccache -s; From 7ed06f3d5ae2f9c6a775c8c9e8498b2f531cffc3 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 9 May 2024 20:31:07 +0000 Subject: [PATCH 10/31] uncomment failing test --- cpp/tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index cc4a16b23b7..7e03e24477e 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -677,7 +677,7 @@ if(BUILD_CUGRAPH_MG_TESTS) ############################################################################################### # - MG PRIMS EXTRACT_TRANSFORM_E tests -------------------------------------------------------- - # ConfigureTestMG(MG_EXTRACT_TRANSFORM_E_TEST prims/mg_extract_transform_e.cu) + ConfigureTestMG(MG_EXTRACT_TRANSFORM_E_TEST prims/mg_extract_transform_e.cu) ############################################################################################### # - MG PRIMS EXTRACT_TRANSFORM_V_FRONTIER_OUTGOING_E tests ------------------------------------ From 499aa5e9a3deb17b21c994366543f404ee659908 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 9 May 2024 21:04:53 +0000 Subject: [PATCH 11/31] add multi-gpu dependencies to pip devcontainer --- .devcontainer/Dockerfile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 3d0ac075be3..3ec0a500541 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -7,6 +7,11 @@ FROM ${BASE} as pip-base ENV DEFAULT_VIRTUAL_ENV=rapids +RUN apt update -y \ + && DEBIAN_FRONTEND=noninteractive apt install -y \ + libblas-dev liblapack-dev libopenmpi-dev \ + && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*; + FROM ${BASE} as conda-base ENV DEFAULT_CONDA_ENV=rapids From b613e435e80a0472e1752a13cea5c43b1a1ecb89 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 13 May 2024 13:08:28 -0700 Subject: [PATCH 12/31] extract_transform_e's e_op can't take a tagged key, fix the MG tests --- cpp/tests/prims/mg_extract_transform_e.cu | 109 +++++++--------------- 1 file changed, 32 insertions(+), 77 deletions(-) diff --git a/cpp/tests/prims/mg_extract_transform_e.cu b/cpp/tests/prims/mg_extract_transform_e.cu index 48b893f6fea..1a9a8660078 100644 --- a/cpp/tests/prims/mg_extract_transform_e.cu +++ b/cpp/tests/prims/mg_extract_transform_e.cu @@ -59,55 +59,27 @@ #include #include -template +template struct e_op_t { - static_assert(std::is_same_v || - std::is_same_v>); static_assert(std::is_same_v || std::is_same_v>); - using return_type = thrust::optional, - std::conditional_t, - thrust::tuple, - thrust::tuple>, - std::conditional_t, - thrust::tuple, - thrust::tuple>>>; - - __device__ return_type operator()(key_t optionally_tagged_src, - vertex_t dst, - property_t src_val, - property_t dst_val, - thrust::nullopt_t) const + using return_type = + thrust::optional, + thrust::tuple, + thrust::tuple>>; + + __device__ return_type operator()( + vertex_t src, vertex_t dst, property_t src_val, property_t dst_val, thrust::nullopt_t) const { auto output_payload = static_cast(1); if (src_val < dst_val) { - if constexpr (std::is_same_v) { - if constexpr (std::is_arithmetic_v) { - return thrust::make_tuple(optionally_tagged_src, dst, output_payload); - } else { - static_assert(thrust::tuple_size::value == size_t{2}); - return thrust::make_tuple(optionally_tagged_src, - dst, - thrust::get<0>(output_payload), - thrust::get<1>(output_payload)); - } + if constexpr (std::is_arithmetic_v) { + return thrust::make_tuple(src, dst, output_payload); } else { - static_assert(thrust::tuple_size::value == size_t{2}); - if constexpr (std::is_arithmetic_v) { - return thrust::make_tuple(thrust::get<0>(optionally_tagged_src), - thrust::get<1>(optionally_tagged_src), - dst, - output_payload); - } else { - static_assert(thrust::tuple_size::value == size_t{2}); - return thrust::make_tuple(thrust::get<0>(optionally_tagged_src), - thrust::get<1>(optionally_tagged_src), - dst, - thrust::get<0>(output_payload), - thrust::get<1>(output_payload)); - } + static_assert(thrust::tuple_size::value == size_t{2}); + return thrust::make_tuple( + src, dst, thrust::get<0>(output_payload), thrust::get<1>(output_payload)); } } else { return thrust::nullopt; @@ -134,19 +106,11 @@ class Tests_MGExtractTransformE virtual void TearDown() {} // Compare the results of extract_transform_e primitive - template + template void run_current_test(Prims_Usecase const& prims_usecase, input_usecase_t const& input_usecase) { using result_t = int32_t; - using key_t = - std::conditional_t, vertex_t, thrust::tuple>; - - static_assert(std::is_same_v || std::is_arithmetic_v); static_assert(std::is_same_v || cugraph::is_arithmetic_or_thrust_tuple_of_arithmetic::value); if constexpr (cugraph::is_thrust_tuple::value) { @@ -212,7 +176,7 @@ class Tests_MGExtractTransformE mg_src_prop.view(), mg_dst_prop.view(), cugraph::edge_dummy_property_t{}.view(), - e_op_t{}); + e_op_t{}); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -225,7 +189,7 @@ class Tests_MGExtractTransformE if (prims_usecase.check_correctness) { auto mg_aggregate_extract_transform_output_buffer = cugraph::allocate_dataframe_buffer< - typename e_op_t::return_type::value_type>( + typename e_op_t::return_type::value_type>( size_t{0}, handle_->get_stream()); std::get<0>(mg_aggregate_extract_transform_output_buffer) = cugraph::test::device_gatherv(*handle_, @@ -239,18 +203,12 @@ class Tests_MGExtractTransformE cugraph::test::device_gatherv(*handle_, std::get<2>(mg_extract_transform_output_buffer).data(), std::get<2>(mg_extract_transform_output_buffer).size()); - if constexpr (!std::is_same_v || !std::is_arithmetic_v) { + if constexpr (!std::is_arithmetic_v) { std::get<3>(mg_aggregate_extract_transform_output_buffer) = cugraph::test::device_gatherv(*handle_, std::get<3>(mg_extract_transform_output_buffer).data(), std::get<3>(mg_extract_transform_output_buffer).size()); } - if constexpr (!std::is_same_v && !std::is_arithmetic_v) { - std::get<4>(mg_aggregate_extract_transform_output_buffer) = - cugraph::test::device_gatherv(*handle_, - std::get<4>(mg_extract_transform_output_buffer).data(), - std::get<4>(mg_extract_transform_output_buffer).size()); - } cugraph::graph_t sg_graph(*handle_); std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( @@ -290,7 +248,7 @@ class Tests_MGExtractTransformE sg_src_prop.view(), sg_dst_prop.view(), cugraph::edge_dummy_property_t{}.view(), - e_op_t{}); + e_op_t{}); thrust::sort(handle_->get_thrust_policy(), cugraph::get_dataframe_buffer_begin(sg_extract_transform_output_buffer), @@ -319,13 +277,13 @@ using Tests_MGExtractTransformE_Rmat = Tests_MGExtractTransformE(std::get<0>(param), std::get<1>(param)); + run_current_test(std::get<0>(param), std::get<1>(param)); } TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatVoidInt32) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -333,14 +291,14 @@ TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatVoidInt32) TEST_P(Tests_MGExtractTransformE_File, CheckInt32Int32FloatVoidTupleFloatInt32) { auto param = GetParam(); - run_current_test>( - std::get<0>(param), std::get<1>(param)); + run_current_test>(std::get<0>(param), + std::get<1>(param)); } TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatVoidTupleFloatInt32) { auto param = GetParam(); - run_current_test>( + run_current_test>( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -348,14 +306,13 @@ TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatVoidTupleFloatInt32) TEST_P(Tests_MGExtractTransformE_File, CheckInt32Int32FloatInt32Int32) { auto param = GetParam(); - run_current_test(std::get<0>(param), - std::get<1>(param)); + run_current_test(std::get<0>(param), std::get<1>(param)); } TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatInt32Int32) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -363,14 +320,14 @@ TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatInt32Int32) TEST_P(Tests_MGExtractTransformE_File, CheckInt32Int32FloatInt32TupleFloatInt32) { auto param = GetParam(); - run_current_test>( - std::get<0>(param), std::get<1>(param)); + run_current_test>(std::get<0>(param), + std::get<1>(param)); } TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatInt32TupleFloatInt32) { auto param = GetParam(); - run_current_test>( + run_current_test>( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -378,14 +335,13 @@ TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatInt32TupleFloatInt32) TEST_P(Tests_MGExtractTransformE_File, CheckInt32Int64FloatInt32Int32) { auto param = GetParam(); - run_current_test(std::get<0>(param), - std::get<1>(param)); + run_current_test(std::get<0>(param), std::get<1>(param)); } TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int64FloatInt32Int32) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -393,14 +349,13 @@ TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int64FloatInt32Int32) TEST_P(Tests_MGExtractTransformE_File, CheckInt64Int64FloatInt32Int32) { auto param = GetParam(); - run_current_test(std::get<0>(param), - std::get<1>(param)); + run_current_test(std::get<0>(param), std::get<1>(param)); } TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt64Int64FloatInt32Int32) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } From 23d5cf44f9e392d27ce19874dae8546a50b9b949 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Mon, 13 May 2024 21:21:01 +0000 Subject: [PATCH 13/31] don't wrap an exec policy in another exec policy --- cpp/src/traversal/bfs_impl.cuh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/traversal/bfs_impl.cuh b/cpp/src/traversal/bfs_impl.cuh index 1f6f29d8683..f144599b777 100644 --- a/cpp/src/traversal/bfs_impl.cuh +++ b/cpp/src/traversal/bfs_impl.cuh @@ -149,11 +149,11 @@ void bfs(raft::handle_t const& handle, auto constexpr invalid_distance = std::numeric_limits::max(); auto constexpr invalid_vertex = invalid_vertex_id::value; - thrust::fill(rmm::exec_policy(handle.get_thrust_policy()), + thrust::fill(handle.get_thrust_policy(), distances, distances + push_graph_view.local_vertex_partition_range_size(), invalid_distance); - thrust::fill(rmm::exec_policy(handle.get_thrust_policy()), + thrust::fill(handle.get_thrust_policy(), predecessor_first, predecessor_first + push_graph_view.local_vertex_partition_range_size(), invalid_vertex); @@ -161,7 +161,7 @@ void bfs(raft::handle_t const& handle, push_graph_view.local_vertex_partition_view()); if (n_sources) { thrust::for_each( - rmm::exec_policy(handle.get_thrust_policy()), + handle.get_thrust_policy(), sources, sources + n_sources, [vertex_partition, distances, predecessor_first] __device__(auto v) { From 5edef0cd2d81e648cb810add7e31a7a6bac24ca7 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Tue, 14 May 2024 21:53:41 +0000 Subject: [PATCH 14/31] test rapids-cmake with CCCL 2.5 --- rapids_config.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rapids_config.cmake b/rapids_config.cmake index 50b1054b7b9..06784f6f8bd 100644 --- a/rapids_config.cmake +++ b/rapids_config.cmake @@ -25,6 +25,9 @@ else() "Could not determine RAPIDS version. Contents of VERSION file:\n${_rapids_version_formatted}") endif() +set(rapids-cmake-repo trxcllnt/rapids-cmake) +set(rapids-cmake-branch fea/cccl-2.5) + if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/CUGRAPH_RAPIDS-${RAPIDS_VERSION_MAJOR_MINOR}.cmake") file( DOWNLOAD From c3e8547bdcce075aa271e637b3525dde8b280488 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Tue, 14 May 2024 21:53:46 +0000 Subject: [PATCH 15/31] revert changes to pr.yaml --- .github/workflows/pr.yaml | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 742a407514d..5733646a8b9 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -195,21 +195,6 @@ jobs: node_type: cpu32 extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY build_command: | - # Tell rapids-cmake to use custom CCCL and cuCollections forks - rapids_branch="$(yq '.x-git-defaults.tag' /opt/rapids-build-utils/manifest.yaml)"; - rapids_version="${rapids_branch#branch-}"; - curl -fsSL -o- https://raw.githubusercontent.com/trxcllnt/rapids-cmake/branch-24.04-cccl-2.4.0/rapids-cmake/cpm/patches/cccl/revert_pr_211.diff \ - | tee ~/rapids-cmake-revert_pr_211.diff; - curl -fsSL -o- "https://raw.githubusercontent.com/rapidsai/rapids-cmake/${rapids_branch}/rapids-cmake/cpm/versions.json" \ - | jq -r ".packages.CCCL *= {\"version\": \"2.5.0\", \"git_tag\": \"pull-request/1667\"}" \ - | jq -r "(.packages.CCCL.patches[] | select(.file == \"cccl/revert_pr_211.diff\")).file = \"${HOME}/rapids-cmake-revert_pr_211.diff\"" \ - | jq -r ".packages.cuco *= {\"git_url\": \"https://github.com/trxcllnt/cuCollections.git\", \"git_tag\": \"rapids-${rapids_version}-cccl-2.5.0\", \"always_download\": true}" \ - | tee ~/rapids-cmake-override-versions.json; sccache -z; - build-all \ - -j$(nproc --ignore=1) -v \ - -DBUILD_CUGRAPH_MG_TESTS=ON \ - -DCMAKE_CXX_FLAGS="-ftemplate-backtrace-limit=0" \ - -DCMAKE_CUDA_FLAGS="-ftemplate-backtrace-limit=0" \ - -DRAPIDS_CMAKE_CPM_DEFAULT_VERSION_FILE="${HOME}/rapids-cmake-override-versions.json"; + build-all --verbose -j$(nproc --ignore=1) -DBUILD_CUGRAPH_MG_TESTS=ON; sccache -s; From 1928e98a974def187bd8da9a74fe4475a2974f9a Mon Sep 17 00:00:00 2001 From: ptaylor Date: Tue, 21 May 2024 22:48:59 +0000 Subject: [PATCH 16/31] install ucx feature after cuda --- .devcontainer/cuda11.8-pip/devcontainer.json | 8 ++++---- .devcontainer/cuda12.2-pip/devcontainer.json | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index a4dc168505b..9b71d9bce92 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -15,9 +15,6 @@ ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": { - "version": "1.15.0" - }, "ghcr.io/rapidsai/devcontainers/features/cuda:24.6": { "version": "11.8", "installcuBLAS": true, @@ -25,11 +22,14 @@ "installcuRAND": true, "installcuSPARSE": true }, + "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": { + "version": "1.15.0" + }, "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {} }, "overrideFeatureInstallOrder": [ - "ghcr.io/rapidsai/devcontainers/features/ucx", "ghcr.io/rapidsai/devcontainers/features/cuda", + "ghcr.io/rapidsai/devcontainers/features/ucx", "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs}"], diff --git a/.devcontainer/cuda12.2-pip/devcontainer.json b/.devcontainer/cuda12.2-pip/devcontainer.json index 393a5c63d23..577887c50c8 100644 --- a/.devcontainer/cuda12.2-pip/devcontainer.json +++ b/.devcontainer/cuda12.2-pip/devcontainer.json @@ -15,9 +15,6 @@ ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": { - "version": "1.15.0" - }, "ghcr.io/rapidsai/devcontainers/features/cuda:24.6": { "version": "12.2", "installcuBLAS": true, @@ -25,11 +22,14 @@ "installcuRAND": true, "installcuSPARSE": true }, + "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": { + "version": "1.15.0" + }, "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {} }, "overrideFeatureInstallOrder": [ - "ghcr.io/rapidsai/devcontainers/features/ucx", "ghcr.io/rapidsai/devcontainers/features/cuda", + "ghcr.io/rapidsai/devcontainers/features/ucx", "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.2-venvs}"], From 603e839729dcc2fc723f3e958dda85f7a22323d0 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 23 May 2024 17:21:32 +0000 Subject: [PATCH 17/31] use devcontainers with ucx and openmpi prebuilt --- .devcontainer/Dockerfile | 2 +- .devcontainer/cuda11.8-pip/devcontainer.json | 6 +----- .devcontainer/cuda12.2-pip/devcontainer.json | 6 +----- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 3ec0a500541..190003dd7af 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -9,7 +9,7 @@ ENV DEFAULT_VIRTUAL_ENV=rapids RUN apt update -y \ && DEBIAN_FRONTEND=noninteractive apt install -y \ - libblas-dev liblapack-dev libopenmpi-dev \ + libblas-dev liblapack-dev \ && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*; FROM ${BASE} as conda-base diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index 9b71d9bce92..2c7b578c044 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,7 +5,7 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-ucx1.15.0-openmpi-ubuntu22.04" } }, "runArgs": [ @@ -22,14 +22,10 @@ "installcuRAND": true, "installcuSPARSE": true }, - "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": { - "version": "1.15.0" - }, "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/cuda", - "ghcr.io/rapidsai/devcontainers/features/ucx", "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs}"], diff --git a/.devcontainer/cuda12.2-pip/devcontainer.json b/.devcontainer/cuda12.2-pip/devcontainer.json index 577887c50c8..c7b0585ea61 100644 --- a/.devcontainer/cuda12.2-pip/devcontainer.json +++ b/.devcontainer/cuda12.2-pip/devcontainer.json @@ -5,7 +5,7 @@ "args": { "CUDA": "12.2", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.06-cpp-cuda12.2-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.06-cpp-cuda12.2-ucx1.15.0-openmpi-ubuntu22.04" } }, "runArgs": [ @@ -22,14 +22,10 @@ "installcuRAND": true, "installcuSPARSE": true }, - "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": { - "version": "1.15.0" - }, "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/cuda", - "ghcr.io/rapidsai/devcontainers/features/ucx", "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.2-venvs}"], From afbee0a366df4cda2ea8623a8915924d3ff6637c Mon Sep 17 00:00:00 2001 From: Ray Bell Date: Tue, 21 May 2024 21:24:18 -0400 Subject: [PATCH 18/31] DOC: doc-update-link-for-cugraphops (#4279) Fixes a broken link https://github.com/rapidsai/cugraph-ops/blob/branch-23.04/README.md -> https://github.com/rapidsai/cugraph/blob/branch-24.04/readme_pages/cugraph_ops.md Authors: - Ray Bell (https://github.com/raybellwaves) - Alex Barghi (https://github.com/alexbarghi-nv) - Rick Ratzel (https://github.com/rlratzel) Approvers: - Don Acosta (https://github.com/acostadon) URL: https://github.com/rapidsai/cugraph/pull/4279 --- docs/cugraph/source/graph_support/cugraphops_support.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/cugraph/source/graph_support/cugraphops_support.rst b/docs/cugraph/source/graph_support/cugraphops_support.rst index fd79564f849..96b13f62a9c 100644 --- a/docs/cugraph/source/graph_support/cugraphops_support.rst +++ b/docs/cugraph/source/graph_support/cugraphops_support.rst @@ -7,4 +7,4 @@ cugraph-ops aims to be a low-level, framework agnostic library providing commonl .. toctree:: :maxdepth: 3 - https://github.com/rapidsai/cugraph-ops/blob/branch-23.04/README.md + https://github.com/rapidsai/cugraph/blob/branch-24.06/readme_pages/cugraph_ops.md From 47972744813e6b6fba3d40599e0ea2dce6c55741 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 23 May 2024 12:56:40 -0700 Subject: [PATCH 19/31] fix devcontainer name for codespaces --- .devcontainer/cuda11.8-conda/devcontainer.json | 2 +- .devcontainer/cuda11.8-pip/devcontainer.json | 2 +- .devcontainer/cuda12.2-conda/devcontainer.json | 2 +- .devcontainer/cuda12.2-pip/devcontainer.json | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index 7c9cd0258a4..d878f2d6584 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -11,7 +11,7 @@ "runArgs": [ "--rm", "--name", - "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index 2c7b578c044..a0edcb27df8 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -11,7 +11,7 @@ "runArgs": [ "--rm", "--name", - "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { diff --git a/.devcontainer/cuda12.2-conda/devcontainer.json b/.devcontainer/cuda12.2-conda/devcontainer.json index eae4967f3b2..8a095d9b934 100644 --- a/.devcontainer/cuda12.2-conda/devcontainer.json +++ b/.devcontainer/cuda12.2-conda/devcontainer.json @@ -11,7 +11,7 @@ "runArgs": [ "--rm", "--name", - "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { diff --git a/.devcontainer/cuda12.2-pip/devcontainer.json b/.devcontainer/cuda12.2-pip/devcontainer.json index c7b0585ea61..10436f8b28d 100644 --- a/.devcontainer/cuda12.2-pip/devcontainer.json +++ b/.devcontainer/cuda12.2-pip/devcontainer.json @@ -11,7 +11,7 @@ "runArgs": [ "--rm", "--name", - "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { From 34f61381ba724c0944ee83a7b94bad025ef1b95c Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 23 May 2024 16:04:39 -0700 Subject: [PATCH 20/31] use trxcllnt/cudf#fix/cccl-2.5 branch when building libcudf from source --- cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake b/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake index 8d57bf570bb..3bba81ce415 100644 --- a/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake +++ b/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake @@ -42,6 +42,6 @@ set(CUGRAPH_ETL_BRANCH_VERSION_cudf "${CUGRAPH_ETL_VERSION_MAJOR}.${CUGRAPH_ETL_ # To use a different RAFT locally, set the CMake variable # RPM_cudf_SOURCE=/path/to/local/cudf find_and_configure_cudf(VERSION ${CUGRAPH_ETL_MIN_VERSION_cudf} - FORK rapidsai - PINNED_TAG branch-${CUGRAPH_ETL_BRANCH_VERSION_cudf} + FORK trxcllnt + PINNED_TAG fix/cccl-2.5 ) From 47be146d6465fdfdd254ca0c2053381d69ee2141 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 23 May 2024 23:11:36 +0000 Subject: [PATCH 21/31] fix lint --- cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake b/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake index 3bba81ce415..f8020296381 100644 --- a/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake +++ b/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From c8f66e40b4f83614bffdf99b74926615bd7938d8 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Fri, 24 May 2024 13:55:52 +0000 Subject: [PATCH 22/31] make similar changes as in https://github.com/rapidsai/cugraph/pull/4436 to resolve wheel test failures --- ci/test_wheel_cugraph-dgl.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/test_wheel_cugraph-dgl.sh b/ci/test_wheel_cugraph-dgl.sh index 827ad487115..046265f2bd2 100755 --- a/ci/test_wheel_cugraph-dgl.sh +++ b/ci/test_wheel_cugraph-dgl.sh @@ -33,7 +33,10 @@ PYTORCH_URL="https://download.pytorch.org/whl/cu${PYTORCH_CUDA_VER}" DGL_URL="https://data.dgl.ai/wheels/cu${PYTORCH_CUDA_VER}/repo.html" rapids-logger "Installing PyTorch and DGL" -rapids-retry python -m pip install torch --index-url ${PYTORCH_URL} +rapids-retry python -m pip install --no-cache-dir torch --index-url ${PYTORCH_URL} rapids-retry python -m pip install dgl==2.0.0 --find-links ${DGL_URL} +python -m pip uninstall -y torch torchvision torchaudio +python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu118 + python -m pytest python/cugraph-dgl/tests From 476a24c7bff3fd16d2cb2a092a6529bbf0650933 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Fri, 24 May 2024 14:08:00 +0000 Subject: [PATCH 23/31] add cuda ver --- ci/test_wheel_cugraph-dgl.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ci/test_wheel_cugraph-dgl.sh b/ci/test_wheel_cugraph-dgl.sh index 046265f2bd2..6713b032a93 100755 --- a/ci/test_wheel_cugraph-dgl.sh +++ b/ci/test_wheel_cugraph-dgl.sh @@ -34,9 +34,8 @@ DGL_URL="https://data.dgl.ai/wheels/cu${PYTORCH_CUDA_VER}/repo.html" rapids-logger "Installing PyTorch and DGL" rapids-retry python -m pip install --no-cache-dir torch --index-url ${PYTORCH_URL} -rapids-retry python -m pip install dgl==2.0.0 --find-links ${DGL_URL} +rapids-retry python -m pip install --no-cache-dir dgl==2.0.0 --find-links ${DGL_URL} -python -m pip uninstall -y torch torchvision torchaudio -python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu118 +python -m pip install -U --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu${PYTORCH_CUDA_VER} python -m pytest python/cugraph-dgl/tests From 96633db82b9a70facbdc0120260e79f283d24a3b Mon Sep 17 00:00:00 2001 From: ptaylor Date: Fri, 24 May 2024 16:50:57 +0000 Subject: [PATCH 24/31] limit CI parallelism to n_cpus - 1 --- .github/workflows/pr.yaml | 2 ++ ci/build_wheel.sh | 1 + 2 files changed, 3 insertions(+) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 5733646a8b9..adbea3b799d 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -44,6 +44,8 @@ jobs: needs: checks secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.06 + env: + PARALLEL_LEVEL: "31" with: build_type: pull-request node_type: cpu32 diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 587c5fb38e7..1e8491d67be 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -56,6 +56,7 @@ fi cd "${package_dir}" +CMAKE_BUILD_PARALLEL_LEVEL="${PARALLEL_LEVEL:-$(nproc --all --ignore=1)}" \ python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check # pure-python packages should be marked as pure, and not have auditwheel run on them. From 4f5c543e272e339522cf638957720f26f202204e Mon Sep 17 00:00:00 2001 From: ptaylor Date: Fri, 24 May 2024 17:20:33 +0000 Subject: [PATCH 25/31] move env to top level --- .github/workflows/pr.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index adbea3b799d..1632431a8a8 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -9,6 +9,9 @@ concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true +env: + PARALLEL_LEVEL: "31" + jobs: pr-builder: needs: @@ -44,8 +47,6 @@ jobs: needs: checks secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.06 - env: - PARALLEL_LEVEL: "31" with: build_type: pull-request node_type: cpu32 From feb4b2ceae056cfbcbbbedd4b4b875aaab4081fb Mon Sep 17 00:00:00 2001 From: ptaylor Date: Fri, 24 May 2024 18:34:28 +0000 Subject: [PATCH 26/31] add PARALLEL_LEVEL to ci/build_{cpp,wheel}.sh --- .github/workflows/pr.yaml | 3 --- ci/build_cpp.sh | 1 + ci/build_wheel.sh | 1 + 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 1632431a8a8..5733646a8b9 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -9,9 +9,6 @@ concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true -env: - PARALLEL_LEVEL: "31" - jobs: pr-builder: needs: diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index 132231e4a64..33e13392732 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -17,6 +17,7 @@ version=$(rapids-generate-version) rapids-logger "Begin cpp build" +PARALLEL_LEVEL="${PARALLEL_LEVEL:-$(nproc --all --ignore=1)}" \ RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild conda/recipes/libcugraph rapids-upload-conda-to-s3 cpp diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 1e8491d67be..83c940ffc05 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -56,6 +56,7 @@ fi cd "${package_dir}" +PARALLEL_LEVEL="${PARALLEL_LEVEL:-$(nproc --all --ignore=1)}" \ CMAKE_BUILD_PARALLEL_LEVEL="${PARALLEL_LEVEL:-$(nproc --all --ignore=1)}" \ python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check From 80a091cd7fbc3e87ff741ac6725e2ffe5462e74d Mon Sep 17 00:00:00 2001 From: ptaylor Date: Fri, 24 May 2024 18:35:46 +0000 Subject: [PATCH 27/31] override parallel_level to n_cpus - 1 --- ci/build_cpp.sh | 2 +- ci/build_wheel.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index 33e13392732..73783896496 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -17,7 +17,7 @@ version=$(rapids-generate-version) rapids-logger "Begin cpp build" -PARALLEL_LEVEL="${PARALLEL_LEVEL:-$(nproc --all --ignore=1)}" \ +PARALLEL_LEVEL="$(nproc --all --ignore=1)" \ RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild conda/recipes/libcugraph rapids-upload-conda-to-s3 cpp diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 83c940ffc05..d6bc138d28e 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -56,8 +56,8 @@ fi cd "${package_dir}" -PARALLEL_LEVEL="${PARALLEL_LEVEL:-$(nproc --all --ignore=1)}" \ -CMAKE_BUILD_PARALLEL_LEVEL="${PARALLEL_LEVEL:-$(nproc --all --ignore=1)}" \ +PARALLEL_LEVEL="$(nproc --all --ignore=1)" \ +CMAKE_BUILD_PARALLEL_LEVEL="$(nproc --all --ignore=1)" \ python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check # pure-python packages should be marked as pure, and not have auditwheel run on them. From a2c383eabca89e34b38e539ac1d8df666111623e Mon Sep 17 00:00:00 2001 From: ptaylor Date: Fri, 24 May 2024 19:41:47 +0000 Subject: [PATCH 28/31] limit parallelism to 8 --- ci/build_cpp.sh | 2 +- ci/build_wheel.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index 73783896496..611eb7d08fb 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -17,7 +17,7 @@ version=$(rapids-generate-version) rapids-logger "Begin cpp build" -PARALLEL_LEVEL="$(nproc --all --ignore=1)" \ +PARALLEL_LEVEL="8" \ RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild conda/recipes/libcugraph rapids-upload-conda-to-s3 cpp diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index d6bc138d28e..2367a4a109f 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -56,8 +56,8 @@ fi cd "${package_dir}" -PARALLEL_LEVEL="$(nproc --all --ignore=1)" \ -CMAKE_BUILD_PARALLEL_LEVEL="$(nproc --all --ignore=1)" \ +PARALLEL_LEVEL="8" \ +CMAKE_BUILD_PARALLEL_LEVEL="8" \ python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check # pure-python packages should be marked as pure, and not have auditwheel run on them. From 394b07bb742a42b3ad8f1bc14838540b02d43ff3 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Tue, 28 May 2024 10:22:51 -0700 Subject: [PATCH 29/31] increase parallelism to 16 --- ci/build_cpp.sh | 2 +- ci/build_wheel.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index 611eb7d08fb..f511383363a 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -17,7 +17,7 @@ version=$(rapids-generate-version) rapids-logger "Begin cpp build" -PARALLEL_LEVEL="8" \ +PARALLEL_LEVEL="16" \ RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild conda/recipes/libcugraph rapids-upload-conda-to-s3 cpp diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 2367a4a109f..00210a55919 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -56,8 +56,8 @@ fi cd "${package_dir}" -PARALLEL_LEVEL="8" \ -CMAKE_BUILD_PARALLEL_LEVEL="8" \ +PARALLEL_LEVEL="16" \ +CMAKE_BUILD_PARALLEL_LEVEL="16" \ python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check # pure-python packages should be marked as pure, and not have auditwheel run on them. From 325e0f6b3612d65fa2b01fd2351fc29529d30b21 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Tue, 28 May 2024 13:53:09 -0700 Subject: [PATCH 30/31] revert changes to rapids_config.cmake --- ci/build_cpp.sh | 1 - ci/build_wheel.sh | 2 -- cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake | 6 +++--- rapids_config.cmake | 3 --- 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index f511383363a..132231e4a64 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -17,7 +17,6 @@ version=$(rapids-generate-version) rapids-logger "Begin cpp build" -PARALLEL_LEVEL="16" \ RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild conda/recipes/libcugraph rapids-upload-conda-to-s3 cpp diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 00210a55919..587c5fb38e7 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -56,8 +56,6 @@ fi cd "${package_dir}" -PARALLEL_LEVEL="16" \ -CMAKE_BUILD_PARALLEL_LEVEL="16" \ python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check # pure-python packages should be marked as pure, and not have auditwheel run on them. diff --git a/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake b/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake index f8020296381..8d57bf570bb 100644 --- a/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake +++ b/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright (c) 2021-2024, NVIDIA CORPORATION. +# Copyright (c) 2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -42,6 +42,6 @@ set(CUGRAPH_ETL_BRANCH_VERSION_cudf "${CUGRAPH_ETL_VERSION_MAJOR}.${CUGRAPH_ETL_ # To use a different RAFT locally, set the CMake variable # RPM_cudf_SOURCE=/path/to/local/cudf find_and_configure_cudf(VERSION ${CUGRAPH_ETL_MIN_VERSION_cudf} - FORK trxcllnt - PINNED_TAG fix/cccl-2.5 + FORK rapidsai + PINNED_TAG branch-${CUGRAPH_ETL_BRANCH_VERSION_cudf} ) diff --git a/rapids_config.cmake b/rapids_config.cmake index 06784f6f8bd..50b1054b7b9 100644 --- a/rapids_config.cmake +++ b/rapids_config.cmake @@ -25,9 +25,6 @@ else() "Could not determine RAPIDS version. Contents of VERSION file:\n${_rapids_version_formatted}") endif() -set(rapids-cmake-repo trxcllnt/rapids-cmake) -set(rapids-cmake-branch fea/cccl-2.5) - if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/CUGRAPH_RAPIDS-${RAPIDS_VERSION_MAJOR_MINOR}.cmake") file( DOWNLOAD From 06dee7ebae8b87e1198eabf1b7837bbcc1388a3d Mon Sep 17 00:00:00 2001 From: ptaylor Date: Tue, 28 May 2024 15:43:14 -0700 Subject: [PATCH 31/31] remove workaround kv_store ctor --- cpp/src/prims/kv_store.cuh | 24 ------------------- ...m_reduce_dst_key_aggregated_outgoing_e.cuh | 2 +- 2 files changed, 1 insertion(+), 25 deletions(-) diff --git a/cpp/src/prims/kv_store.cuh b/cpp/src/prims/kv_store.cuh index e28c8c3068f..de233fd583b 100644 --- a/cpp/src/prims/kv_store.cuh +++ b/cpp/src/prims/kv_store.cuh @@ -946,30 +946,6 @@ class kv_store_t { { } - /* when use_binary_search = true */ - template - kv_store_t(rmm::device_uvector&& keys, - decltype(allocate_dataframe_buffer(0, rmm::cuda_stream_view{}))&& values, - decltype(cugraph::invalid_idx::value) - invalid_value /* invalid_value is returned when match fails for the given key */, - bool key_sorted /* if set to true, assume that the input data is sorted and skip - sorting (which is necessary for binary-search) */ - , - rmm::cuda_stream_view stream, - std::enable_if_t::value, int32_t> = 0) - : store_( - std::move(keys), - std::move(values), - [=]() { - auto invalid_row = value_t{}; - thrust::get<0>(invalid_row) = invalid_value; - return invalid_row; - }(), - key_sorted, - stream) - { - } - /* when use binary_search = false, this requires that the capacity is large enough */ template std::enable_if_t insert(KeyIterator key_first, diff --git a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh index 006d7760666..7be30b0a5f0 100644 --- a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh @@ -754,7 +754,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( std::make_unique>( std::move(majors), std::move(edge_major_values), - invalid_vertex_id::value, + edge_src_value_t{}, true, handle.get_stream()); }