From 5d83412dc91708b1c4014c0bf76fc8bb723789d0 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Wed, 8 May 2024 21:46:41 +0000
Subject: [PATCH 01/31] proclaim return types for CCCL 2.4+

---
 cpp/include/cugraph/utilities/mask_utils.cuh            | 2 +-
 cpp/src/community/detail/common_methods.cuh             | 2 +-
 cpp/src/community/legacy/louvain.cuh                    | 2 +-
 cpp/src/components/weakly_connected_components_impl.cuh | 4 ++--
 cpp/src/detail/utility_wrappers.cu                      | 2 +-
 cpp/src/structure/graph_view_impl.cuh                   | 7 ++++---
 cpp/tests/sampling/sampling_post_processing_test.cu     | 4 ++--
 7 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/cpp/include/cugraph/utilities/mask_utils.cuh b/cpp/include/cugraph/utilities/mask_utils.cuh
index 7b69ea3fe3a..8ff6b25aedc 100644
--- a/cpp/include/cugraph/utilities/mask_utils.cuh
+++ b/cpp/include/cugraph/utilities/mask_utils.cuh
@@ -160,7 +160,7 @@ size_t count_set_bits(raft::handle_t const& handle, MaskIterator mask_first, siz
     handle.get_thrust_policy(),
     thrust::make_counting_iterator(size_t{0}),
     thrust::make_counting_iterator(packed_bool_size(num_bits)),
-    [mask_first, num_bits] __device__(size_t i) {
+    [mask_first, num_bits] __device__(size_t i) -> size_t {
       auto word = *(mask_first + i);
       if ((i + 1) * packed_bools_per_word() > num_bits) {
         word &= packed_bool_partial_mask(num_bits % packed_bools_per_word());
diff --git a/cpp/src/community/detail/common_methods.cuh b/cpp/src/community/detail/common_methods.cuh
index fe0a415db30..b8670496fed 100644
--- a/cpp/src/community/detail/common_methods.cuh
+++ b/cpp/src/community/detail/common_methods.cuh
@@ -178,7 +178,7 @@ weight_t compute_modularity(
     handle.get_thrust_policy(),
     cluster_weights.begin(),
     cluster_weights.end(),
-    [] __device__(weight_t p) { return p * p; },
+    [] __device__(weight_t p) -> weight_t { return p * p; },
     weight_t{0},
     thrust::plus<weight_t>());
 
diff --git a/cpp/src/community/legacy/louvain.cuh b/cpp/src/community/legacy/louvain.cuh
index 6cf5bbdc3c6..4c7ca3f1e2f 100644
--- a/cpp/src/community/legacy/louvain.cuh
+++ b/cpp/src/community/legacy/louvain.cuh
@@ -142,7 +142,7 @@ class Louvain {
       thrust::make_counting_iterator(0),
       thrust::make_counting_iterator(graph.number_of_vertices),
       [d_deg = deg.data(), d_inc = inc.data(), total_edge_weight, resolution] __device__(
-        vertex_t community) {
+        vertex_t community) -> weight_t {
         return ((d_inc[community] / total_edge_weight) - resolution *
                                                            (d_deg[community] * d_deg[community]) /
                                                            (total_edge_weight * total_edge_weight));
diff --git a/cpp/src/components/weakly_connected_components_impl.cuh b/cpp/src/components/weakly_connected_components_impl.cuh
index d4d6d842951..6c950fb93ec 100644
--- a/cpp/src/components/weakly_connected_components_impl.cuh
+++ b/cpp/src/components/weakly_connected_components_impl.cuh
@@ -400,7 +400,7 @@ void weakly_connected_components_impl(raft::handle_t const& handle,
         handle.get_thrust_policy(),
         new_root_candidates.begin(),
         new_root_candidates.begin() + (new_root_candidates.size() > 0 ? 1 : 0),
-        [vertex_partition, degrees = degrees.data()] __device__(auto v) {
+        [vertex_partition, degrees = degrees.data()] __device__(auto v) -> edge_t {
           return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)];
         },
         edge_t{0},
@@ -642,7 +642,7 @@ void weakly_connected_components_impl(raft::handle_t const& handle,
         handle.get_thrust_policy(),
         thrust::get<0>(vertex_frontier.bucket(bucket_idx_cur).begin().get_iterator_tuple()),
         thrust::get<0>(vertex_frontier.bucket(bucket_idx_cur).end().get_iterator_tuple()),
-        [vertex_partition, degrees = degrees.data()] __device__(auto v) {
+        [vertex_partition, degrees = degrees.data()] __device__(auto v) -> edge_t {
           return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)];
         },
         edge_t{0},
diff --git a/cpp/src/detail/utility_wrappers.cu b/cpp/src/detail/utility_wrappers.cu
index 9100ecbd5e1..99a69fa00ee 100644
--- a/cpp/src/detail/utility_wrappers.cu
+++ b/cpp/src/detail/utility_wrappers.cu
@@ -139,7 +139,7 @@ vertex_t compute_maximum_vertex_id(rmm::cuda_stream_view const& stream_view,
     rmm::exec_policy(stream_view),
     edge_first,
     edge_first + num_edges,
-    [] __device__(auto e) { return std::max(thrust::get<0>(e), thrust::get<1>(e)); },
+    [] __device__(auto e) -> vertex_t { return std::max(thrust::get<0>(e), thrust::get<1>(e)); },
     vertex_t{0},
     thrust::maximum<vertex_t>());
 }
diff --git a/cpp/src/structure/graph_view_impl.cuh b/cpp/src/structure/graph_view_impl.cuh
index 29dca6ef409..543b3478137 100644
--- a/cpp/src/structure/graph_view_impl.cuh
+++ b/cpp/src/structure/graph_view_impl.cuh
@@ -353,7 +353,7 @@ edge_t count_edge_partition_multi_edges(
         execution_policy,
         thrust::make_counting_iterator(edge_partition.major_range_first()) + (*segment_offsets)[2],
         thrust::make_counting_iterator(edge_partition.major_range_first()) + (*segment_offsets)[3],
-        [edge_partition] __device__(auto major) {
+        [edge_partition] __device__(auto major) -> edge_t {
           auto major_offset = edge_partition.major_offset_from_major_nocheck(major);
           vertex_t const* indices{nullptr};
           [[maybe_unused]] edge_t edge_offset{};
@@ -374,7 +374,8 @@ edge_t count_edge_partition_multi_edges(
         execution_policy,
         thrust::make_counting_iterator(vertex_t{0}),
         thrust::make_counting_iterator(*(edge_partition.dcs_nzd_vertex_count())),
-        [edge_partition, major_start_offset = (*segment_offsets)[3]] __device__(auto idx) {
+        [edge_partition,
+         major_start_offset = (*segment_offsets)[3]] __device__(auto idx) -> edge_t {
           auto major_idx =
             major_start_offset + idx;  // major_offset != major_idx in the hypersparse region
           vertex_t const* indices{nullptr};
@@ -398,7 +399,7 @@ edge_t count_edge_partition_multi_edges(
       thrust::make_counting_iterator(edge_partition.major_range_first()),
       thrust::make_counting_iterator(edge_partition.major_range_first()) +
         edge_partition.major_range_size(),
-      [edge_partition] __device__(auto major) {
+      [edge_partition] __device__(auto major) -> edge_t {
         auto major_offset = edge_partition.major_offset_from_major_nocheck(major);
         vertex_t const* indices{nullptr};
         [[maybe_unused]] edge_t edge_offset{};
diff --git a/cpp/tests/sampling/sampling_post_processing_test.cu b/cpp/tests/sampling/sampling_post_processing_test.cu
index c87cc5b960b..58b0629ec39 100644
--- a/cpp/tests/sampling/sampling_post_processing_test.cu
+++ b/cpp/tests/sampling/sampling_post_processing_test.cu
@@ -402,7 +402,7 @@ bool check_renumber_map_invariants(
          raft::device_span<vertex_t const>(sorted_org_vertices.data(), sorted_org_vertices.size()),
        matching_renumbered_vertices = raft::device_span<vertex_t const>(
          matching_renumbered_vertices.data(),
-         matching_renumbered_vertices.size())] __device__(vertex_t major) {
+         matching_renumbered_vertices.size())] __device__(vertex_t major) -> vertex_t {
         auto it = thrust::lower_bound(
           thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major);
         return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)];
@@ -418,7 +418,7 @@ bool check_renumber_map_invariants(
          raft::device_span<vertex_t const>(sorted_org_vertices.data(), sorted_org_vertices.size()),
        matching_renumbered_vertices = raft::device_span<vertex_t const>(
          matching_renumbered_vertices.data(),
-         matching_renumbered_vertices.size())] __device__(vertex_t minor) {
+         matching_renumbered_vertices.size())] __device__(vertex_t minor) -> vertex_t {
         auto it = thrust::lower_bound(
           thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), minor);
         return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)];

From 658b71e9913cd0d85fe3d9a16b8e5a15bd35eb29 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Wed, 8 May 2024 22:07:50 +0000
Subject: [PATCH 02/31] hide RAFT pragma deprecation messages

---
 cpp/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index eb6f348b380..42ee6e91dcf 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -82,14 +82,14 @@ set(CUGRAPH_CXX_FLAGS "")
 set(CUGRAPH_CUDA_FLAGS "")
 
 if(CMAKE_COMPILER_IS_GNUCXX)
-    list(APPEND CUGRAPH_CXX_FLAGS -Werror -Wno-error=deprecated-declarations)
+    list(APPEND CUGRAPH_CXX_FLAGS -Werror -Wno-error=deprecated-declarations -Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS)
 endif(CMAKE_COMPILER_IS_GNUCXX)
 
 
 message("-- Building for GPU_ARCHS = ${CMAKE_CUDA_ARCHITECTURES}")
 
 list(APPEND CUGRAPH_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)
-list(APPEND CUGRAPH_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas=--disable-warnings)
+list(APPEND CUGRAPH_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS -Xptxas=--disable-warnings)
 list(APPEND CUGRAPH_CUDA_FLAGS -Xcompiler=-Wall,-Wno-error=sign-compare,-Wno-error=unused-but-set-variable)
 list(APPEND CUGRAPH_CUDA_FLAGS -Xfatbin=-compress-all)
 

From c365372b968dd0a5ced94ce137e302c9bbe670eb Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Wed, 8 May 2024 22:22:33 +0000
Subject: [PATCH 03/31] use cuda::proclaim_return_type

---
 cpp/include/cugraph/utilities/mask_utils.cuh  |  5 ++-
 cpp/src/community/detail/common_methods.cuh   |  3 +-
 cpp/src/community/legacy/louvain.cuh          | 15 +++++---
 .../weakly_connected_components_impl.cuh      | 15 +++++---
 cpp/src/detail/utility_wrappers.cu            |  4 +-
 cpp/src/structure/graph_view_impl.cuh         | 37 ++++++++++---------
 .../sampling/sampling_post_processing_test.cu | 23 ++++++------
 7 files changed, 57 insertions(+), 45 deletions(-)

diff --git a/cpp/include/cugraph/utilities/mask_utils.cuh b/cpp/include/cugraph/utilities/mask_utils.cuh
index 8ff6b25aedc..1d86eef0ed1 100644
--- a/cpp/include/cugraph/utilities/mask_utils.cuh
+++ b/cpp/include/cugraph/utilities/mask_utils.cuh
@@ -20,6 +20,7 @@
 
 #include <raft/core/handle.hpp>
 
+#include <cuda/functional>
 #include <thrust/copy.h>
 #include <thrust/functional.h>
 #include <thrust/iterator/counting_iterator.h>
@@ -160,13 +161,13 @@ size_t count_set_bits(raft::handle_t const& handle, MaskIterator mask_first, siz
     handle.get_thrust_policy(),
     thrust::make_counting_iterator(size_t{0}),
     thrust::make_counting_iterator(packed_bool_size(num_bits)),
-    [mask_first, num_bits] __device__(size_t i) -> size_t {
+    cuda::proclaim_return_type<size_t>([mask_first, num_bits] __device__(size_t i) -> size_t {
       auto word = *(mask_first + i);
       if ((i + 1) * packed_bools_per_word() > num_bits) {
         word &= packed_bool_partial_mask(num_bits % packed_bools_per_word());
       }
       return static_cast<size_t>(__popc(word));
-    },
+    }),
     size_t{0},
     thrust::plus<size_t>{});
 }
diff --git a/cpp/src/community/detail/common_methods.cuh b/cpp/src/community/detail/common_methods.cuh
index b8670496fed..dcad4e92b95 100644
--- a/cpp/src/community/detail/common_methods.cuh
+++ b/cpp/src/community/detail/common_methods.cuh
@@ -29,6 +29,7 @@
 #include <cugraph/detail/utility_wrappers.hpp>
 #include <cugraph/graph_functions.hpp>
 
+#include <cuda/functional>
 #include <thrust/binary_search.h>
 #include <thrust/execution_policy.h>
 #include <thrust/functional.h>
@@ -178,7 +179,7 @@ weight_t compute_modularity(
     handle.get_thrust_policy(),
     cluster_weights.begin(),
     cluster_weights.end(),
-    [] __device__(weight_t p) -> weight_t { return p * p; },
+    cuda::proclaim_return_type<weight_t>([] __device__(weight_t p) -> weight_t { return p * p; }),
     weight_t{0},
     thrust::plus<weight_t>());
 
diff --git a/cpp/src/community/legacy/louvain.cuh b/cpp/src/community/legacy/louvain.cuh
index 4c7ca3f1e2f..53d0b231c03 100644
--- a/cpp/src/community/legacy/louvain.cuh
+++ b/cpp/src/community/legacy/louvain.cuh
@@ -22,6 +22,7 @@
 
 #include <cugraph/dendrogram.hpp>
 #include <cugraph/legacy/graph.hpp>
+
 #ifdef TIMING
 #include <cugraph/utilities/high_res_timer.hpp>
 #endif
@@ -29,6 +30,7 @@
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/functional>
 #include <thrust/copy.h>
 #include <thrust/distance.h>
 #include <thrust/execution_policy.h>
@@ -141,12 +143,13 @@ class Louvain {
       handle_.get_thrust_policy(),
       thrust::make_counting_iterator(0),
       thrust::make_counting_iterator(graph.number_of_vertices),
-      [d_deg = deg.data(), d_inc = inc.data(), total_edge_weight, resolution] __device__(
-        vertex_t community) -> weight_t {
-        return ((d_inc[community] / total_edge_weight) - resolution *
-                                                           (d_deg[community] * d_deg[community]) /
-                                                           (total_edge_weight * total_edge_weight));
-      },
+      cuda::proclaim_return_type<weight_t>(
+        [d_deg = deg.data(), d_inc = inc.data(), total_edge_weight, resolution] __device__(
+          vertex_t community) -> weight_t {
+          return ((d_inc[community] / total_edge_weight) -
+                  resolution * (d_deg[community] * d_deg[community]) /
+                    (total_edge_weight * total_edge_weight));
+        }),
       weight_t{0.0},
       thrust::plus<weight_t>());
 
diff --git a/cpp/src/components/weakly_connected_components_impl.cuh b/cpp/src/components/weakly_connected_components_impl.cuh
index 6c950fb93ec..f63f28210d8 100644
--- a/cpp/src/components/weakly_connected_components_impl.cuh
+++ b/cpp/src/components/weakly_connected_components_impl.cuh
@@ -34,6 +34,7 @@
 
 #include <rmm/device_uvector.hpp>
 
+#include <cuda/functional>
 #include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/distance.h>
@@ -400,9 +401,10 @@ void weakly_connected_components_impl(raft::handle_t const& handle,
         handle.get_thrust_policy(),
         new_root_candidates.begin(),
         new_root_candidates.begin() + (new_root_candidates.size() > 0 ? 1 : 0),
-        [vertex_partition, degrees = degrees.data()] __device__(auto v) -> edge_t {
-          return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)];
-        },
+        cuda::proclaim_return_type<edge_t>(
+          [vertex_partition, degrees = degrees.data()] __device__(auto v) -> edge_t {
+            return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)];
+          }),
         edge_t{0},
         thrust::plus<edge_t>{});
 
@@ -642,9 +644,10 @@ void weakly_connected_components_impl(raft::handle_t const& handle,
         handle.get_thrust_policy(),
         thrust::get<0>(vertex_frontier.bucket(bucket_idx_cur).begin().get_iterator_tuple()),
         thrust::get<0>(vertex_frontier.bucket(bucket_idx_cur).end().get_iterator_tuple()),
-        [vertex_partition, degrees = degrees.data()] __device__(auto v) -> edge_t {
-          return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)];
-        },
+        cuda::proclaim_return_type<edge_t>(
+          [vertex_partition, degrees = degrees.data()] __device__(auto v) -> edge_t {
+            return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)];
+          }),
         edge_t{0},
         thrust::plus<edge_t>());
 
diff --git a/cpp/src/detail/utility_wrappers.cu b/cpp/src/detail/utility_wrappers.cu
index 99a69fa00ee..6d6158a16e7 100644
--- a/cpp/src/detail/utility_wrappers.cu
+++ b/cpp/src/detail/utility_wrappers.cu
@@ -21,6 +21,7 @@
 
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/functional>
 #include <thrust/count.h>
 #include <thrust/distance.h>
 #include <thrust/functional.h>
@@ -139,7 +140,8 @@ vertex_t compute_maximum_vertex_id(rmm::cuda_stream_view const& stream_view,
     rmm::exec_policy(stream_view),
     edge_first,
     edge_first + num_edges,
-    [] __device__(auto e) -> vertex_t { return std::max(thrust::get<0>(e), thrust::get<1>(e)); },
+    cuda::proclaim_return_type<vertex_t>(
+      [] __device__(auto e) -> vertex_t { return std::max(thrust::get<0>(e), thrust::get<1>(e)); }),
     vertex_t{0},
     thrust::maximum<vertex_t>());
 }
diff --git a/cpp/src/structure/graph_view_impl.cuh b/cpp/src/structure/graph_view_impl.cuh
index 543b3478137..7097349dce5 100644
--- a/cpp/src/structure/graph_view_impl.cuh
+++ b/cpp/src/structure/graph_view_impl.cuh
@@ -353,7 +353,7 @@ edge_t count_edge_partition_multi_edges(
         execution_policy,
         thrust::make_counting_iterator(edge_partition.major_range_first()) + (*segment_offsets)[2],
         thrust::make_counting_iterator(edge_partition.major_range_first()) + (*segment_offsets)[3],
-        [edge_partition] __device__(auto major) -> edge_t {
+        cuda::proclaim_return_type<edge_t>([edge_partition] __device__(auto major) -> edge_t {
           auto major_offset = edge_partition.major_offset_from_major_nocheck(major);
           vertex_t const* indices{nullptr};
           [[maybe_unused]] edge_t edge_offset{};
@@ -365,7 +365,7 @@ edge_t count_edge_partition_multi_edges(
             if (indices[i - 1] == indices[i]) { ++count; }
           }
           return count;
-        },
+        }),
         edge_t{0},
         thrust::plus<edge_t>{});
     }
@@ -374,20 +374,21 @@ edge_t count_edge_partition_multi_edges(
         execution_policy,
         thrust::make_counting_iterator(vertex_t{0}),
         thrust::make_counting_iterator(*(edge_partition.dcs_nzd_vertex_count())),
-        [edge_partition,
-         major_start_offset = (*segment_offsets)[3]] __device__(auto idx) -> edge_t {
-          auto major_idx =
-            major_start_offset + idx;  // major_offset != major_idx in the hypersparse region
-          vertex_t const* indices{nullptr};
-          [[maybe_unused]] edge_t edge_offset{};
-          edge_t local_degree{};
-          thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_idx);
-          edge_t count{0};
-          for (edge_t i = 1; i < local_degree; ++i) {  // assumes neighbors are sorted
-            if (indices[i - 1] == indices[i]) { ++count; }
-          }
-          return count;
-        },
+        cuda::proclaim_return_type<edge_t>(
+          [edge_partition,
+           major_start_offset = (*segment_offsets)[3]] __device__(auto idx) -> edge_t {
+            auto major_idx =
+              major_start_offset + idx;  // major_offset != major_idx in the hypersparse region
+            vertex_t const* indices{nullptr};
+            [[maybe_unused]] edge_t edge_offset{};
+            edge_t local_degree{};
+            thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_idx);
+            edge_t count{0};
+            for (edge_t i = 1; i < local_degree; ++i) {  // assumes neighbors are sorted
+              if (indices[i - 1] == indices[i]) { ++count; }
+            }
+            return count;
+          }),
         edge_t{0},
         thrust::plus<edge_t>{});
     }
@@ -399,7 +400,7 @@ edge_t count_edge_partition_multi_edges(
       thrust::make_counting_iterator(edge_partition.major_range_first()),
       thrust::make_counting_iterator(edge_partition.major_range_first()) +
         edge_partition.major_range_size(),
-      [edge_partition] __device__(auto major) -> edge_t {
+      cuda::proclaim_return_type<edge_t>([edge_partition] __device__(auto major) -> edge_t {
         auto major_offset = edge_partition.major_offset_from_major_nocheck(major);
         vertex_t const* indices{nullptr};
         [[maybe_unused]] edge_t edge_offset{};
@@ -410,7 +411,7 @@ edge_t count_edge_partition_multi_edges(
           if (indices[i - 1] == indices[i]) { ++count; }
         }
         return count;
-      },
+      }),
       edge_t{0},
       thrust::plus<edge_t>{});
   }
diff --git a/cpp/tests/sampling/sampling_post_processing_test.cu b/cpp/tests/sampling/sampling_post_processing_test.cu
index 58b0629ec39..5e21825901a 100644
--- a/cpp/tests/sampling/sampling_post_processing_test.cu
+++ b/cpp/tests/sampling/sampling_post_processing_test.cu
@@ -398,15 +398,16 @@ bool check_renumber_map_invariants(
       handle.get_thrust_policy(),
       unique_majors.begin(),
       unique_majors.end(),
-      [sorted_org_vertices =
-         raft::device_span<vertex_t const>(sorted_org_vertices.data(), sorted_org_vertices.size()),
-       matching_renumbered_vertices = raft::device_span<vertex_t const>(
-         matching_renumbered_vertices.data(),
-         matching_renumbered_vertices.size())] __device__(vertex_t major) -> vertex_t {
-        auto it = thrust::lower_bound(
-          thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major);
-        return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)];
-      },
+      cuda::proclaim_return_type<vertex_t>(
+        [sorted_org_vertices = raft::device_span<vertex_t const>(sorted_org_vertices.data(),
+                                                                 sorted_org_vertices.size()),
+         matching_renumbered_vertices = raft::device_span<vertex_t const>(
+           matching_renumbered_vertices.data(),
+           matching_renumbered_vertices.size())] __device__(vertex_t major) -> vertex_t {
+          auto it = thrust::lower_bound(
+            thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major);
+          return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)];
+        }),
       std::numeric_limits<vertex_t>::lowest(),
       thrust::maximum<vertex_t>{});
 
@@ -414,7 +415,7 @@ bool check_renumber_map_invariants(
       handle.get_thrust_policy(),
       unique_minors.begin(),
       unique_minors.end(),
-      [sorted_org_vertices =
+      cuda::proclaim_return_type<vertex_t>([sorted_org_vertices =
          raft::device_span<vertex_t const>(sorted_org_vertices.data(), sorted_org_vertices.size()),
        matching_renumbered_vertices = raft::device_span<vertex_t const>(
          matching_renumbered_vertices.data(),
@@ -422,7 +423,7 @@ bool check_renumber_map_invariants(
         auto it = thrust::lower_bound(
           thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), minor);
         return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)];
-      },
+      }),
       std::numeric_limits<vertex_t>::max(),
       thrust::minimum<vertex_t>{});
 

From cbca1404b488cf33b796328a0add1f2d639c4430 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Wed, 8 May 2024 22:38:41 +0000
Subject: [PATCH 04/31] fix lint

---
 .../sampling/sampling_post_processing_test.cu | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/cpp/tests/sampling/sampling_post_processing_test.cu b/cpp/tests/sampling/sampling_post_processing_test.cu
index 5e21825901a..3bca382a2eb 100644
--- a/cpp/tests/sampling/sampling_post_processing_test.cu
+++ b/cpp/tests/sampling/sampling_post_processing_test.cu
@@ -415,15 +415,16 @@ bool check_renumber_map_invariants(
       handle.get_thrust_policy(),
       unique_minors.begin(),
       unique_minors.end(),
-      cuda::proclaim_return_type<vertex_t>([sorted_org_vertices =
-         raft::device_span<vertex_t const>(sorted_org_vertices.data(), sorted_org_vertices.size()),
-       matching_renumbered_vertices = raft::device_span<vertex_t const>(
-         matching_renumbered_vertices.data(),
-         matching_renumbered_vertices.size())] __device__(vertex_t minor) -> vertex_t {
-        auto it = thrust::lower_bound(
-          thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), minor);
-        return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)];
-      }),
+      cuda::proclaim_return_type<vertex_t>(
+        [sorted_org_vertices = raft::device_span<vertex_t const>(sorted_org_vertices.data(),
+                                                                 sorted_org_vertices.size()),
+         matching_renumbered_vertices = raft::device_span<vertex_t const>(
+           matching_renumbered_vertices.data(),
+           matching_renumbered_vertices.size())] __device__(vertex_t minor) -> vertex_t {
+          auto it = thrust::lower_bound(
+            thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), minor);
+          return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)];
+        }),
       std::numeric_limits<vertex_t>::max(),
       thrust::minimum<vertex_t>{});
 

From 423343998b0c9f7735d1eed0513dcdebbdcc8625 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Wed, 8 May 2024 23:13:18 +0000
Subject: [PATCH 05/31] add kv_store_t overload for thrust::tuple construction
 changes

---
 .../cugraph/utilities/device_functors.cuh     |  7 +++---
 cpp/src/prims/kv_store.cuh                    | 25 +++++++++++++++++++
 cpp/tests/CMakeLists.txt                      |  2 +-
 3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/cpp/include/cugraph/utilities/device_functors.cuh b/cpp/include/cugraph/utilities/device_functors.cuh
index 3af8ed1dd19..7c2480dcd74 100644
--- a/cpp/include/cugraph/utilities/device_functors.cuh
+++ b/cpp/include/cugraph/utilities/device_functors.cuh
@@ -78,13 +78,14 @@ struct indirection_t {
 
 template <typename index_t, typename Iterator>
 struct indirection_if_idx_valid_t {
+  using value_type = typename thrust::iterator_traits<Iterator>::value_type;
   Iterator first{};
   index_t invalid_idx{};
-  typename thrust::iterator_traits<Iterator>::value_type invalid_value{};
+  value_type invalid_value{};
 
-  __device__ typename thrust::iterator_traits<Iterator>::value_type operator()(index_t i) const
+  __device__ value_type operator()(index_t i) const
   {
-    return (i != invalid_idx) ? *(first + i) : invalid_value;
+    return (i != invalid_idx) ? static_cast<value_type>(*(first + i)) : invalid_value;
   }
 };
 
diff --git a/cpp/src/prims/kv_store.cuh b/cpp/src/prims/kv_store.cuh
index 2cc7856d87a..088d3efa51b 100644
--- a/cpp/src/prims/kv_store.cuh
+++ b/cpp/src/prims/kv_store.cuh
@@ -17,6 +17,7 @@
 
 #include "prims/detail/optional_dataframe_buffer.hpp"
 
+#include <cugraph/graph.hpp>
 #include <cugraph/utilities/dataframe_buffer.hpp>
 #include <cugraph/utilities/device_functors.cuh>
 
@@ -944,6 +945,30 @@ class kv_store_t {
   {
   }
 
+  /* when use_binary_search = true */
+  template <bool binary_search = use_binary_search>
+  kv_store_t(rmm::device_uvector<key_t>&& keys,
+             decltype(allocate_dataframe_buffer<value_t>(0, rmm::cuda_stream_view{}))&& values,
+             decltype(cugraph::invalid_idx<key_t>::value)
+               invalid_value /* invalid_value is returned when match fails for the given key */,
+             bool key_sorted /* if set to true, assume that the input data is sorted and skip
+                                sorting (which is necessary for binary-search) */
+             ,
+             rmm::cuda_stream_view stream,
+             std::enable_if_t<binary_search && is_thrust_tuple<value_t>::value, int32_t> = 0)
+    : store_(
+        std::move(keys),
+        std::move(values),
+        [=]() {
+          auto invalid_row               = value_t{};
+          cuda::std::get<0>(invalid_row) = invalid_value;
+          return invalid_row;
+        }(),
+        key_sorted,
+        stream)
+  {
+  }
+
   /* when use binary_search = false, this requires that the capacity is large enough */
   template <typename KeyIterator, typename ValueIterator, bool binary_search = use_binary_search>
   std::enable_if_t<!binary_search, void> insert(KeyIterator key_first,
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 2dcda796f9c..35e7b144f6a 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -661,7 +661,7 @@ if(BUILD_CUGRAPH_MG_TESTS)
 
     ###############################################################################################
     # - MG PRIMS EXTRACT_TRANSFORM_E tests --------------------------------------------------------
-    ConfigureTestMG(MG_EXTRACT_TRANSFORM_E_TEST prims/mg_extract_transform_e.cu)
+    # ConfigureTestMG(MG_EXTRACT_TRANSFORM_E_TEST prims/mg_extract_transform_e.cu)
 
     ###############################################################################################
     # - MG PRIMS EXTRACT_TRANSFORM_V_FRONTIER_OUTGOING_E tests ------------------------------------

From d8a5733202333b84be2b69dd4caf084af6b20c1e Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Thu, 9 May 2024 00:25:35 +0000
Subject: [PATCH 06/31] fix lint

---
 cpp/include/cugraph/utilities/device_functors.cuh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/include/cugraph/utilities/device_functors.cuh b/cpp/include/cugraph/utilities/device_functors.cuh
index 7c2480dcd74..20cf98f7e6d 100644
--- a/cpp/include/cugraph/utilities/device_functors.cuh
+++ b/cpp/include/cugraph/utilities/device_functors.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.

From 87a1d3d473af372766d1b606ba0be7bf2562085f Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Thu, 9 May 2024 03:26:01 +0000
Subject: [PATCH 07/31] use thrust::get

---
 cpp/src/prims/kv_store.cuh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/prims/kv_store.cuh b/cpp/src/prims/kv_store.cuh
index 088d3efa51b..93b990bb8a1 100644
--- a/cpp/src/prims/kv_store.cuh
+++ b/cpp/src/prims/kv_store.cuh
@@ -960,8 +960,8 @@ class kv_store_t {
         std::move(keys),
         std::move(values),
         [=]() {
-          auto invalid_row               = value_t{};
-          cuda::std::get<0>(invalid_row) = invalid_value;
+          auto invalid_row            = value_t{};
+          thrust::get<0>(invalid_row) = invalid_value;
           return invalid_row;
         }(),
         key_sorted,

From 3aaedf4a4fcbbb4c22018483f78c7d8859879f80 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Thu, 9 May 2024 13:30:44 -0700
Subject: [PATCH 08/31] define cxx and cuda standards

---
 cpp/tests/CMakeLists.txt | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 35e7b144f6a..cc4a16b23b7 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -169,7 +169,11 @@ function(ConfigureTest CMAKE_TEST_NAME)
     )
     set_target_properties(
         ${CMAKE_TEST_NAME}
-            PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib")
+            PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib"
+                       CXX_STANDARD                        17
+                       CXX_STANDARD_REQUIRED               ON
+                       CUDA_STANDARD                       17
+                       CUDA_STANDARD_REQUIRED              ON)
 
     rapids_test_add(
         NAME ${CMAKE_TEST_NAME}
@@ -195,7 +199,11 @@ function(ConfigureTestMG CMAKE_TEST_NAME)
     )
     set_target_properties(
         ${CMAKE_TEST_NAME}
-            PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib")
+            PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib"
+                       CXX_STANDARD                        17
+                       CXX_STANDARD_REQUIRED               ON
+                       CUDA_STANDARD                       17
+                       CUDA_STANDARD_REQUIRED              ON)
 
     rapids_test_add(
         NAME ${CMAKE_TEST_NAME}
@@ -241,7 +249,11 @@ function(ConfigureCTest CMAKE_TEST_NAME)
     )
     set_target_properties(
         ${CMAKE_TEST_NAME}
-            PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib")
+            PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib"
+                       CXX_STANDARD                        17
+                       CXX_STANDARD_REQUIRED               ON
+                       CUDA_STANDARD                       17
+                       CUDA_STANDARD_REQUIRED              ON)
 
     rapids_test_add(
         NAME ${CMAKE_TEST_NAME}
@@ -269,7 +281,11 @@ function(ConfigureCTestMG CMAKE_TEST_NAME)
     )
     set_target_properties(
         ${CMAKE_TEST_NAME}
-            PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib")
+            PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib"
+                       CXX_STANDARD                        17
+                       CXX_STANDARD_REQUIRED               ON
+                       CUDA_STANDARD                       17
+                       CUDA_STANDARD_REQUIRED              ON)
 
     rapids_test_add(
         NAME ${CMAKE_TEST_NAME}

From f5b1e77b4b960336037bb0d982ccb683755ce052 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Thu, 9 May 2024 20:30:51 +0000
Subject: [PATCH 09/31] update devcontainer workflow to use
 NVIDIA/cccl#pull-request/1667

---
 .github/workflows/pr.yaml | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index c04e0e879d2..742a407514d 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -195,6 +195,21 @@ jobs:
       node_type: cpu32
       extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY
       build_command: |
+        # Tell rapids-cmake to use custom CCCL and cuCollections forks
+        rapids_branch="$(yq '.x-git-defaults.tag' /opt/rapids-build-utils/manifest.yaml)";
+        rapids_version="${rapids_branch#branch-}";
+        curl -fsSL -o- https://raw.githubusercontent.com/trxcllnt/rapids-cmake/branch-24.04-cccl-2.4.0/rapids-cmake/cpm/patches/cccl/revert_pr_211.diff \
+          | tee ~/rapids-cmake-revert_pr_211.diff;
+        curl -fsSL -o- "https://raw.githubusercontent.com/rapidsai/rapids-cmake/${rapids_branch}/rapids-cmake/cpm/versions.json" \
+          | jq -r ".packages.CCCL *= {\"version\": \"2.5.0\", \"git_tag\": \"pull-request/1667\"}" \
+          | jq -r "(.packages.CCCL.patches[] | select(.file == \"cccl/revert_pr_211.diff\")).file = \"${HOME}/rapids-cmake-revert_pr_211.diff\"" \
+          | jq -r ".packages.cuco *= {\"git_url\": \"https://github.com/trxcllnt/cuCollections.git\", \"git_tag\": \"rapids-${rapids_version}-cccl-2.5.0\", \"always_download\": true}" \
+          | tee ~/rapids-cmake-override-versions.json;
         sccache -z;
-        build-all --verbose -j$(nproc --ignore=1);
+        build-all \
+          -j$(nproc --ignore=1) -v \
+          -DBUILD_CUGRAPH_MG_TESTS=ON \
+          -DCMAKE_CXX_FLAGS="-ftemplate-backtrace-limit=0" \
+          -DCMAKE_CUDA_FLAGS="-ftemplate-backtrace-limit=0" \
+          -DRAPIDS_CMAKE_CPM_DEFAULT_VERSION_FILE="${HOME}/rapids-cmake-override-versions.json";
         sccache -s;

From 7ed06f3d5ae2f9c6a775c8c9e8498b2f531cffc3 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Thu, 9 May 2024 20:31:07 +0000
Subject: [PATCH 10/31] uncomment failing test

---
 cpp/tests/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index cc4a16b23b7..7e03e24477e 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -677,7 +677,7 @@ if(BUILD_CUGRAPH_MG_TESTS)
 
     ###############################################################################################
     # - MG PRIMS EXTRACT_TRANSFORM_E tests --------------------------------------------------------
-    # ConfigureTestMG(MG_EXTRACT_TRANSFORM_E_TEST prims/mg_extract_transform_e.cu)
+    ConfigureTestMG(MG_EXTRACT_TRANSFORM_E_TEST prims/mg_extract_transform_e.cu)
 
     ###############################################################################################
     # - MG PRIMS EXTRACT_TRANSFORM_V_FRONTIER_OUTGOING_E tests ------------------------------------

From 499aa5e9a3deb17b21c994366543f404ee659908 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Thu, 9 May 2024 21:04:53 +0000
Subject: [PATCH 11/31] add multi-gpu dependencies to pip devcontainer

---
 .devcontainer/Dockerfile | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 3d0ac075be3..3ec0a500541 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -7,6 +7,11 @@ FROM ${BASE} as pip-base
 
 ENV DEFAULT_VIRTUAL_ENV=rapids
 
+RUN apt update -y \
+ && DEBIAN_FRONTEND=noninteractive apt install -y \
+    libblas-dev liblapack-dev libopenmpi-dev \
+ && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*;
+
 FROM ${BASE} as conda-base
 
 ENV DEFAULT_CONDA_ENV=rapids

From b613e435e80a0472e1752a13cea5c43b1a1ecb89 Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <seunghwak@nvidia.com>
Date: Mon, 13 May 2024 13:08:28 -0700
Subject: [PATCH 12/31] extract_transform_e's e_op can't take a tagged key, fix
 the MG tests

---
 cpp/tests/prims/mg_extract_transform_e.cu | 109 +++++++---------------
 1 file changed, 32 insertions(+), 77 deletions(-)

diff --git a/cpp/tests/prims/mg_extract_transform_e.cu b/cpp/tests/prims/mg_extract_transform_e.cu
index 48b893f6fea..1a9a8660078 100644
--- a/cpp/tests/prims/mg_extract_transform_e.cu
+++ b/cpp/tests/prims/mg_extract_transform_e.cu
@@ -59,55 +59,27 @@
 #include <sstream>
 #include <type_traits>
 
-template <typename key_t, typename vertex_t, typename property_t, typename output_payload_t>
+template <typename vertex_t, typename property_t, typename output_payload_t>
 struct e_op_t {
-  static_assert(std::is_same_v<key_t, vertex_t> ||
-                std::is_same_v<key_t, thrust::tuple<vertex_t, int32_t>>);
   static_assert(std::is_same_v<output_payload_t, int32_t> ||
                 std::is_same_v<output_payload_t, thrust::tuple<float, int32_t>>);
 
-  using return_type = thrust::optional<typename std::conditional_t<
-    std::is_same_v<key_t, vertex_t>,
-    std::conditional_t<std::is_arithmetic_v<output_payload_t>,
-                       thrust::tuple<vertex_t, vertex_t, int32_t>,
-                       thrust::tuple<vertex_t, vertex_t, float, int32_t>>,
-    std::conditional_t<std::is_arithmetic_v<output_payload_t>,
-                       thrust::tuple<vertex_t, int32_t, vertex_t, int32_t>,
-                       thrust::tuple<vertex_t, int32_t, vertex_t, float, int32_t>>>>;
-
-  __device__ return_type operator()(key_t optionally_tagged_src,
-                                    vertex_t dst,
-                                    property_t src_val,
-                                    property_t dst_val,
-                                    thrust::nullopt_t) const
+  using return_type =
+    thrust::optional<std::conditional_t<std::is_arithmetic_v<output_payload_t>,
+                                        thrust::tuple<vertex_t, vertex_t, int32_t>,
+                                        thrust::tuple<vertex_t, vertex_t, float, int32_t>>>;
+
+  __device__ return_type operator()(
+    vertex_t src, vertex_t dst, property_t src_val, property_t dst_val, thrust::nullopt_t) const
   {
     auto output_payload = static_cast<output_payload_t>(1);
     if (src_val < dst_val) {
-      if constexpr (std::is_same_v<key_t, vertex_t>) {
-        if constexpr (std::is_arithmetic_v<output_payload_t>) {
-          return thrust::make_tuple(optionally_tagged_src, dst, output_payload);
-        } else {
-          static_assert(thrust::tuple_size<output_payload_t>::value == size_t{2});
-          return thrust::make_tuple(optionally_tagged_src,
-                                    dst,
-                                    thrust::get<0>(output_payload),
-                                    thrust::get<1>(output_payload));
-        }
+      if constexpr (std::is_arithmetic_v<output_payload_t>) {
+        return thrust::make_tuple(src, dst, output_payload);
       } else {
-        static_assert(thrust::tuple_size<key_t>::value == size_t{2});
-        if constexpr (std::is_arithmetic_v<output_payload_t>) {
-          return thrust::make_tuple(thrust::get<0>(optionally_tagged_src),
-                                    thrust::get<1>(optionally_tagged_src),
-                                    dst,
-                                    output_payload);
-        } else {
-          static_assert(thrust::tuple_size<output_payload_t>::value == size_t{2});
-          return thrust::make_tuple(thrust::get<0>(optionally_tagged_src),
-                                    thrust::get<1>(optionally_tagged_src),
-                                    dst,
-                                    thrust::get<0>(output_payload),
-                                    thrust::get<1>(output_payload));
-        }
+        static_assert(thrust::tuple_size<output_payload_t>::value == size_t{2});
+        return thrust::make_tuple(
+          src, dst, thrust::get<0>(output_payload), thrust::get<1>(output_payload));
       }
     } else {
       return thrust::nullopt;
@@ -134,19 +106,11 @@ class Tests_MGExtractTransformE
   virtual void TearDown() {}
 
   // Compare the results of extract_transform_e primitive
-  template <typename vertex_t,
-            typename edge_t,
-            typename weight_t,
-            typename tag_t,
-            typename output_payload_t>
+  template <typename vertex_t, typename edge_t, typename weight_t, typename output_payload_t>
   void run_current_test(Prims_Usecase const& prims_usecase, input_usecase_t const& input_usecase)
   {
     using result_t = int32_t;
 
-    using key_t =
-      std::conditional_t<std::is_same_v<tag_t, void>, vertex_t, thrust::tuple<vertex_t, tag_t>>;
-
-    static_assert(std::is_same_v<tag_t, void> || std::is_arithmetic_v<tag_t>);
     static_assert(std::is_same_v<output_payload_t, void> ||
                   cugraph::is_arithmetic_or_thrust_tuple_of_arithmetic<output_payload_t>::value);
     if constexpr (cugraph::is_thrust_tuple<output_payload_t>::value) {
@@ -212,7 +176,7 @@ class Tests_MGExtractTransformE
                                    mg_src_prop.view(),
                                    mg_dst_prop.view(),
                                    cugraph::edge_dummy_property_t{}.view(),
-                                   e_op_t<key_t, vertex_t, result_t, output_payload_t>{});
+                                   e_op_t<vertex_t, result_t, output_payload_t>{});
 
     if (cugraph::test::g_perf) {
       RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
@@ -225,7 +189,7 @@ class Tests_MGExtractTransformE
 
     if (prims_usecase.check_correctness) {
       auto mg_aggregate_extract_transform_output_buffer = cugraph::allocate_dataframe_buffer<
-        typename e_op_t<key_t, vertex_t, result_t, output_payload_t>::return_type::value_type>(
+        typename e_op_t<vertex_t, result_t, output_payload_t>::return_type::value_type>(
         size_t{0}, handle_->get_stream());
       std::get<0>(mg_aggregate_extract_transform_output_buffer) =
         cugraph::test::device_gatherv(*handle_,
@@ -239,18 +203,12 @@ class Tests_MGExtractTransformE
         cugraph::test::device_gatherv(*handle_,
                                       std::get<2>(mg_extract_transform_output_buffer).data(),
                                       std::get<2>(mg_extract_transform_output_buffer).size());
-      if constexpr (!std::is_same_v<key_t, vertex_t> || !std::is_arithmetic_v<output_payload_t>) {
+      if constexpr (!std::is_arithmetic_v<output_payload_t>) {
         std::get<3>(mg_aggregate_extract_transform_output_buffer) =
           cugraph::test::device_gatherv(*handle_,
                                         std::get<3>(mg_extract_transform_output_buffer).data(),
                                         std::get<3>(mg_extract_transform_output_buffer).size());
       }
-      if constexpr (!std::is_same_v<key_t, vertex_t> && !std::is_arithmetic_v<output_payload_t>) {
-        std::get<4>(mg_aggregate_extract_transform_output_buffer) =
-          cugraph::test::device_gatherv(*handle_,
-                                        std::get<4>(mg_extract_transform_output_buffer).data(),
-                                        std::get<4>(mg_extract_transform_output_buffer).size());
-      }
 
       cugraph::graph_t<vertex_t, edge_t, store_transposed, false> sg_graph(*handle_);
       std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph(
@@ -290,7 +248,7 @@ class Tests_MGExtractTransformE
                                        sg_src_prop.view(),
                                        sg_dst_prop.view(),
                                        cugraph::edge_dummy_property_t{}.view(),
-                                       e_op_t<key_t, vertex_t, result_t, output_payload_t>{});
+                                       e_op_t<vertex_t, result_t, output_payload_t>{});
 
         thrust::sort(handle_->get_thrust_policy(),
                      cugraph::get_dataframe_buffer_begin(sg_extract_transform_output_buffer),
@@ -319,13 +277,13 @@ using Tests_MGExtractTransformE_Rmat = Tests_MGExtractTransformE<cugraph::test::
 TEST_P(Tests_MGExtractTransformE_File, CheckInt32Int32FloatVoidInt32)
 {
   auto param = GetParam();
-  run_current_test<int32_t, int32_t, float, void, int32_t>(std::get<0>(param), std::get<1>(param));
+  run_current_test<int32_t, int32_t, float, int32_t>(std::get<0>(param), std::get<1>(param));
 }
 
 TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatVoidInt32)
 {
   auto param = GetParam();
-  run_current_test<int32_t, int32_t, float, void, int32_t>(
+  run_current_test<int32_t, int32_t, float, int32_t>(
     std::get<0>(param),
     cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
 }
@@ -333,14 +291,14 @@ TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatVoidInt32)
 TEST_P(Tests_MGExtractTransformE_File, CheckInt32Int32FloatVoidTupleFloatInt32)
 {
   auto param = GetParam();
-  run_current_test<int32_t, int32_t, float, void, thrust::tuple<float, int32_t>>(
-    std::get<0>(param), std::get<1>(param));
+  run_current_test<int32_t, int32_t, float, thrust::tuple<float, int32_t>>(std::get<0>(param),
+                                                                           std::get<1>(param));
 }
 
 TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatVoidTupleFloatInt32)
 {
   auto param = GetParam();
-  run_current_test<int32_t, int32_t, float, void, thrust::tuple<float, int32_t>>(
+  run_current_test<int32_t, int32_t, float, thrust::tuple<float, int32_t>>(
     std::get<0>(param),
     cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
 }
@@ -348,14 +306,13 @@ TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatVoidTupleFloatInt32)
 TEST_P(Tests_MGExtractTransformE_File, CheckInt32Int32FloatInt32Int32)
 {
   auto param = GetParam();
-  run_current_test<int32_t, int32_t, float, int32_t, int32_t>(std::get<0>(param),
-                                                              std::get<1>(param));
+  run_current_test<int32_t, int32_t, float, int32_t>(std::get<0>(param), std::get<1>(param));
 }
 
 TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatInt32Int32)
 {
   auto param = GetParam();
-  run_current_test<int32_t, int32_t, float, int32_t, int32_t>(
+  run_current_test<int32_t, int32_t, float, int32_t>(
     std::get<0>(param),
     cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
 }
@@ -363,14 +320,14 @@ TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatInt32Int32)
 TEST_P(Tests_MGExtractTransformE_File, CheckInt32Int32FloatInt32TupleFloatInt32)
 {
   auto param = GetParam();
-  run_current_test<int32_t, int32_t, float, int32_t, thrust::tuple<float, int32_t>>(
-    std::get<0>(param), std::get<1>(param));
+  run_current_test<int32_t, int32_t, float, thrust::tuple<float, int32_t>>(std::get<0>(param),
+                                                                           std::get<1>(param));
 }
 
 TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatInt32TupleFloatInt32)
 {
   auto param = GetParam();
-  run_current_test<int32_t, int32_t, float, int32_t, thrust::tuple<float, int32_t>>(
+  run_current_test<int32_t, int32_t, float, thrust::tuple<float, int32_t>>(
     std::get<0>(param),
     cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
 }
@@ -378,14 +335,13 @@ TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatInt32TupleFloatInt32)
 TEST_P(Tests_MGExtractTransformE_File, CheckInt32Int64FloatInt32Int32)
 {
   auto param = GetParam();
-  run_current_test<int32_t, int64_t, float, int32_t, int32_t>(std::get<0>(param),
-                                                              std::get<1>(param));
+  run_current_test<int32_t, int64_t, float, int32_t>(std::get<0>(param), std::get<1>(param));
 }
 
 TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int64FloatInt32Int32)
 {
   auto param = GetParam();
-  run_current_test<int32_t, int64_t, float, int32_t, int32_t>(
+  run_current_test<int32_t, int64_t, float, int32_t>(
     std::get<0>(param),
     cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
 }
@@ -393,14 +349,13 @@ TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int64FloatInt32Int32)
 TEST_P(Tests_MGExtractTransformE_File, CheckInt64Int64FloatInt32Int32)
 {
   auto param = GetParam();
-  run_current_test<int64_t, int64_t, float, int32_t, int32_t>(std::get<0>(param),
-                                                              std::get<1>(param));
+  run_current_test<int64_t, int64_t, float, int32_t>(std::get<0>(param), std::get<1>(param));
 }
 
 TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt64Int64FloatInt32Int32)
 {
   auto param = GetParam();
-  run_current_test<int64_t, int64_t, float, int32_t, int32_t>(
+  run_current_test<int64_t, int64_t, float, int32_t>(
     std::get<0>(param),
     cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
 }

From 23d5cf44f9e392d27ce19874dae8546a50b9b949 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Mon, 13 May 2024 21:21:01 +0000
Subject: [PATCH 13/31] don't wrap an exec policy in another exec policy

---
 cpp/src/traversal/bfs_impl.cuh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/src/traversal/bfs_impl.cuh b/cpp/src/traversal/bfs_impl.cuh
index 1f6f29d8683..f144599b777 100644
--- a/cpp/src/traversal/bfs_impl.cuh
+++ b/cpp/src/traversal/bfs_impl.cuh
@@ -149,11 +149,11 @@ void bfs(raft::handle_t const& handle,
   auto constexpr invalid_distance = std::numeric_limits<vertex_t>::max();
   auto constexpr invalid_vertex   = invalid_vertex_id<vertex_t>::value;
 
-  thrust::fill(rmm::exec_policy(handle.get_thrust_policy()),
+  thrust::fill(handle.get_thrust_policy(),
                distances,
                distances + push_graph_view.local_vertex_partition_range_size(),
                invalid_distance);
-  thrust::fill(rmm::exec_policy(handle.get_thrust_policy()),
+  thrust::fill(handle.get_thrust_policy(),
                predecessor_first,
                predecessor_first + push_graph_view.local_vertex_partition_range_size(),
                invalid_vertex);
@@ -161,7 +161,7 @@ void bfs(raft::handle_t const& handle,
     push_graph_view.local_vertex_partition_view());
   if (n_sources) {
     thrust::for_each(
-      rmm::exec_policy(handle.get_thrust_policy()),
+      handle.get_thrust_policy(),
       sources,
       sources + n_sources,
       [vertex_partition, distances, predecessor_first] __device__(auto v) {

From 5edef0cd2d81e648cb810add7e31a7a6bac24ca7 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Tue, 14 May 2024 21:53:41 +0000
Subject: [PATCH 14/31] test rapids-cmake with CCCL 2.5

---
 rapids_config.cmake | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/rapids_config.cmake b/rapids_config.cmake
index 50b1054b7b9..06784f6f8bd 100644
--- a/rapids_config.cmake
+++ b/rapids_config.cmake
@@ -25,6 +25,9 @@ else()
       "Could not determine RAPIDS version. Contents of VERSION file:\n${_rapids_version_formatted}")
 endif()
 
+set(rapids-cmake-repo trxcllnt/rapids-cmake)
+set(rapids-cmake-branch fea/cccl-2.5)
+
 if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/CUGRAPH_RAPIDS-${RAPIDS_VERSION_MAJOR_MINOR}.cmake")
   file(
     DOWNLOAD

From c3e8547bdcce075aa271e637b3525dde8b280488 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Tue, 14 May 2024 21:53:46 +0000
Subject: [PATCH 15/31] revert changes to pr.yaml

---
 .github/workflows/pr.yaml | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 742a407514d..5733646a8b9 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -195,21 +195,6 @@ jobs:
       node_type: cpu32
       extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY
       build_command: |
-        # Tell rapids-cmake to use custom CCCL and cuCollections forks
-        rapids_branch="$(yq '.x-git-defaults.tag' /opt/rapids-build-utils/manifest.yaml)";
-        rapids_version="${rapids_branch#branch-}";
-        curl -fsSL -o- https://raw.githubusercontent.com/trxcllnt/rapids-cmake/branch-24.04-cccl-2.4.0/rapids-cmake/cpm/patches/cccl/revert_pr_211.diff \
-          | tee ~/rapids-cmake-revert_pr_211.diff;
-        curl -fsSL -o- "https://raw.githubusercontent.com/rapidsai/rapids-cmake/${rapids_branch}/rapids-cmake/cpm/versions.json" \
-          | jq -r ".packages.CCCL *= {\"version\": \"2.5.0\", \"git_tag\": \"pull-request/1667\"}" \
-          | jq -r "(.packages.CCCL.patches[] | select(.file == \"cccl/revert_pr_211.diff\")).file = \"${HOME}/rapids-cmake-revert_pr_211.diff\"" \
-          | jq -r ".packages.cuco *= {\"git_url\": \"https://github.com/trxcllnt/cuCollections.git\", \"git_tag\": \"rapids-${rapids_version}-cccl-2.5.0\", \"always_download\": true}" \
-          | tee ~/rapids-cmake-override-versions.json;
         sccache -z;
-        build-all \
-          -j$(nproc --ignore=1) -v \
-          -DBUILD_CUGRAPH_MG_TESTS=ON \
-          -DCMAKE_CXX_FLAGS="-ftemplate-backtrace-limit=0" \
-          -DCMAKE_CUDA_FLAGS="-ftemplate-backtrace-limit=0" \
-          -DRAPIDS_CMAKE_CPM_DEFAULT_VERSION_FILE="${HOME}/rapids-cmake-override-versions.json";
+        build-all --verbose -j$(nproc --ignore=1) -DBUILD_CUGRAPH_MG_TESTS=ON;
         sccache -s;

From 1928e98a974def187bd8da9a74fe4475a2974f9a Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Tue, 21 May 2024 22:48:59 +0000
Subject: [PATCH 16/31] install ucx feature after cuda

---
 .devcontainer/cuda11.8-pip/devcontainer.json | 8 ++++----
 .devcontainer/cuda12.2-pip/devcontainer.json | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
index a4dc168505b..9b71d9bce92 100644
--- a/.devcontainer/cuda11.8-pip/devcontainer.json
+++ b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -15,9 +15,6 @@
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
-    "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": {
-      "version": "1.15.0"
-    },
     "ghcr.io/rapidsai/devcontainers/features/cuda:24.6": {
       "version": "11.8",
       "installcuBLAS": true,
@@ -25,11 +22,14 @@
       "installcuRAND": true,
       "installcuSPARSE": true
     },
+    "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": {
+      "version": "1.15.0"
+    },
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {}
   },
   "overrideFeatureInstallOrder": [
-    "ghcr.io/rapidsai/devcontainers/features/ucx",
     "ghcr.io/rapidsai/devcontainers/features/cuda",
+    "ghcr.io/rapidsai/devcontainers/features/ucx",
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
   ],
   "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs}"],
diff --git a/.devcontainer/cuda12.2-pip/devcontainer.json b/.devcontainer/cuda12.2-pip/devcontainer.json
index 393a5c63d23..577887c50c8 100644
--- a/.devcontainer/cuda12.2-pip/devcontainer.json
+++ b/.devcontainer/cuda12.2-pip/devcontainer.json
@@ -15,9 +15,6 @@
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
-    "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": {
-      "version": "1.15.0"
-    },
     "ghcr.io/rapidsai/devcontainers/features/cuda:24.6": {
       "version": "12.2",
       "installcuBLAS": true,
@@ -25,11 +22,14 @@
       "installcuRAND": true,
       "installcuSPARSE": true
     },
+    "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": {
+      "version": "1.15.0"
+    },
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {}
   },
   "overrideFeatureInstallOrder": [
-    "ghcr.io/rapidsai/devcontainers/features/ucx",
     "ghcr.io/rapidsai/devcontainers/features/cuda",
+    "ghcr.io/rapidsai/devcontainers/features/ucx",
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
   ],
   "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.2-venvs}"],

From 603e839729dcc2fc723f3e958dda85f7a22323d0 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Thu, 23 May 2024 17:21:32 +0000
Subject: [PATCH 17/31] use devcontainers with ucx and openmpi prebuilt

---
 .devcontainer/Dockerfile                     | 2 +-
 .devcontainer/cuda11.8-pip/devcontainer.json | 6 +-----
 .devcontainer/cuda12.2-pip/devcontainer.json | 6 +-----
 3 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 3ec0a500541..190003dd7af 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -9,7 +9,7 @@ ENV DEFAULT_VIRTUAL_ENV=rapids
 
 RUN apt update -y \
  && DEBIAN_FRONTEND=noninteractive apt install -y \
-    libblas-dev liblapack-dev libopenmpi-dev \
+    libblas-dev liblapack-dev \
  && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*;
 
 FROM ${BASE} as conda-base
diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
index 9b71d9bce92..2c7b578c044 100644
--- a/.devcontainer/cuda11.8-pip/devcontainer.json
+++ b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "11.8",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-ucx1.15.0-openmpi-ubuntu22.04"
     }
   },
   "runArgs": [
@@ -22,14 +22,10 @@
       "installcuRAND": true,
       "installcuSPARSE": true
     },
-    "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": {
-      "version": "1.15.0"
-    },
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {}
   },
   "overrideFeatureInstallOrder": [
     "ghcr.io/rapidsai/devcontainers/features/cuda",
-    "ghcr.io/rapidsai/devcontainers/features/ucx",
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
   ],
   "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs}"],
diff --git a/.devcontainer/cuda12.2-pip/devcontainer.json b/.devcontainer/cuda12.2-pip/devcontainer.json
index 577887c50c8..c7b0585ea61 100644
--- a/.devcontainer/cuda12.2-pip/devcontainer.json
+++ b/.devcontainer/cuda12.2-pip/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "12.2",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:24.06-cpp-cuda12.2-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.06-cpp-cuda12.2-ucx1.15.0-openmpi-ubuntu22.04"
     }
   },
   "runArgs": [
@@ -22,14 +22,10 @@
       "installcuRAND": true,
       "installcuSPARSE": true
     },
-    "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": {
-      "version": "1.15.0"
-    },
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {}
   },
   "overrideFeatureInstallOrder": [
     "ghcr.io/rapidsai/devcontainers/features/cuda",
-    "ghcr.io/rapidsai/devcontainers/features/ucx",
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
   ],
   "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.2-venvs}"],

From afbee0a366df4cda2ea8623a8915924d3ff6637c Mon Sep 17 00:00:00 2001
From: Ray Bell <rayjohnbell0@gmail.com>
Date: Tue, 21 May 2024 21:24:18 -0400
Subject: [PATCH 18/31] DOC: doc-update-link-for-cugraphops (#4279)

Fixes a broken link

https://github.com/rapidsai/cugraph-ops/blob/branch-23.04/README.md -> https://github.com/rapidsai/cugraph/blob/branch-24.04/readme_pages/cugraph_ops.md

Authors:
  - Ray Bell (https://github.com/raybellwaves)
  - Alex Barghi (https://github.com/alexbarghi-nv)
  - Rick Ratzel (https://github.com/rlratzel)

Approvers:
  - Don Acosta (https://github.com/acostadon)

URL: https://github.com/rapidsai/cugraph/pull/4279
---
 docs/cugraph/source/graph_support/cugraphops_support.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/cugraph/source/graph_support/cugraphops_support.rst b/docs/cugraph/source/graph_support/cugraphops_support.rst
index fd79564f849..96b13f62a9c 100644
--- a/docs/cugraph/source/graph_support/cugraphops_support.rst
+++ b/docs/cugraph/source/graph_support/cugraphops_support.rst
@@ -7,4 +7,4 @@ cugraph-ops aims to be a low-level, framework agnostic library providing commonl
 .. toctree::
    :maxdepth: 3
 
-   https://github.com/rapidsai/cugraph-ops/blob/branch-23.04/README.md
+   https://github.com/rapidsai/cugraph/blob/branch-24.06/readme_pages/cugraph_ops.md

From 47972744813e6b6fba3d40599e0ea2dce6c55741 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Thu, 23 May 2024 12:56:40 -0700
Subject: [PATCH 19/31] fix devcontainer name for codespaces

---
 .devcontainer/cuda11.8-conda/devcontainer.json | 2 +-
 .devcontainer/cuda11.8-pip/devcontainer.json   | 2 +-
 .devcontainer/cuda12.2-conda/devcontainer.json | 2 +-
 .devcontainer/cuda12.2-pip/devcontainer.json   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json
index 7c9cd0258a4..d878f2d6584 100644
--- a/.devcontainer/cuda11.8-conda/devcontainer.json
+++ b/.devcontainer/cuda11.8-conda/devcontainer.json
@@ -11,7 +11,7 @@
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-conda"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-conda"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
index 2c7b578c044..a0edcb27df8 100644
--- a/.devcontainer/cuda11.8-pip/devcontainer.json
+++ b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -11,7 +11,7 @@
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-pip"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-pip"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
diff --git a/.devcontainer/cuda12.2-conda/devcontainer.json b/.devcontainer/cuda12.2-conda/devcontainer.json
index eae4967f3b2..8a095d9b934 100644
--- a/.devcontainer/cuda12.2-conda/devcontainer.json
+++ b/.devcontainer/cuda12.2-conda/devcontainer.json
@@ -11,7 +11,7 @@
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-conda"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-conda"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
diff --git a/.devcontainer/cuda12.2-pip/devcontainer.json b/.devcontainer/cuda12.2-pip/devcontainer.json
index c7b0585ea61..10436f8b28d 100644
--- a/.devcontainer/cuda12.2-pip/devcontainer.json
+++ b/.devcontainer/cuda12.2-pip/devcontainer.json
@@ -11,7 +11,7 @@
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-pip"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-pip"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {

From 34f61381ba724c0944ee83a7b94bad025ef1b95c Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Thu, 23 May 2024 16:04:39 -0700
Subject: [PATCH 20/31] use trxcllnt/cudf#fix/cccl-2.5 branch when building
 libcudf from source

---
 cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake b/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake
index 8d57bf570bb..3bba81ce415 100644
--- a/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake
+++ b/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake
@@ -42,6 +42,6 @@ set(CUGRAPH_ETL_BRANCH_VERSION_cudf "${CUGRAPH_ETL_VERSION_MAJOR}.${CUGRAPH_ETL_
 # To use a different RAFT locally, set the CMake variable
 # RPM_cudf_SOURCE=/path/to/local/cudf
 find_and_configure_cudf(VERSION    ${CUGRAPH_ETL_MIN_VERSION_cudf}
-                        FORK       rapidsai
-                        PINNED_TAG branch-${CUGRAPH_ETL_BRANCH_VERSION_cudf}
+                        FORK       trxcllnt
+                        PINNED_TAG fix/cccl-2.5
                         )

From 47be146d6465fdfdd254ca0c2053381d69ee2141 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Thu, 23 May 2024 23:11:36 +0000
Subject: [PATCH 21/31] fix lint

---
 cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake b/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake
index 3bba81ce415..f8020296381 100644
--- a/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake
+++ b/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From c8f66e40b4f83614bffdf99b74926615bd7938d8 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Fri, 24 May 2024 13:55:52 +0000
Subject: [PATCH 22/31] make similar changes as in
 https://github.com/rapidsai/cugraph/pull/4436 to resolve wheel test failures

---
 ci/test_wheel_cugraph-dgl.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/ci/test_wheel_cugraph-dgl.sh b/ci/test_wheel_cugraph-dgl.sh
index 827ad487115..046265f2bd2 100755
--- a/ci/test_wheel_cugraph-dgl.sh
+++ b/ci/test_wheel_cugraph-dgl.sh
@@ -33,7 +33,10 @@ PYTORCH_URL="https://download.pytorch.org/whl/cu${PYTORCH_CUDA_VER}"
 DGL_URL="https://data.dgl.ai/wheels/cu${PYTORCH_CUDA_VER}/repo.html"
 
 rapids-logger "Installing PyTorch and DGL"
-rapids-retry python -m pip install torch --index-url ${PYTORCH_URL}
+rapids-retry python -m pip install --no-cache-dir torch --index-url ${PYTORCH_URL}
 rapids-retry python -m pip install dgl==2.0.0 --find-links ${DGL_URL}
 
+python -m pip uninstall -y torch torchvision torchaudio
+python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu118
+
 python -m pytest python/cugraph-dgl/tests

From 476a24c7bff3fd16d2cb2a092a6529bbf0650933 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Fri, 24 May 2024 14:08:00 +0000
Subject: [PATCH 23/31] add cuda ver

---
 ci/test_wheel_cugraph-dgl.sh | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/ci/test_wheel_cugraph-dgl.sh b/ci/test_wheel_cugraph-dgl.sh
index 046265f2bd2..6713b032a93 100755
--- a/ci/test_wheel_cugraph-dgl.sh
+++ b/ci/test_wheel_cugraph-dgl.sh
@@ -34,9 +34,8 @@ DGL_URL="https://data.dgl.ai/wheels/cu${PYTORCH_CUDA_VER}/repo.html"
 
 rapids-logger "Installing PyTorch and DGL"
 rapids-retry python -m pip install --no-cache-dir torch --index-url ${PYTORCH_URL}
-rapids-retry python -m pip install dgl==2.0.0 --find-links ${DGL_URL}
+rapids-retry python -m pip install --no-cache-dir dgl==2.0.0 --find-links ${DGL_URL}
 
-python -m pip uninstall -y torch torchvision torchaudio
-python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu118
+python -m pip install -U --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu${PYTORCH_CUDA_VER}
 
 python -m pytest python/cugraph-dgl/tests

From 96633db82b9a70facbdc0120260e79f283d24a3b Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Fri, 24 May 2024 16:50:57 +0000
Subject: [PATCH 24/31] limit CI parallelism to n_cpus - 1

---
 .github/workflows/pr.yaml | 2 ++
 ci/build_wheel.sh         | 1 +
 2 files changed, 3 insertions(+)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 5733646a8b9..adbea3b799d 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -44,6 +44,8 @@ jobs:
     needs: checks
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.06
+    env:
+      PARALLEL_LEVEL: "31"
     with:
       build_type: pull-request
       node_type: cpu32
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 587c5fb38e7..1e8491d67be 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -56,6 +56,7 @@ fi
 
 cd "${package_dir}"
 
+CMAKE_BUILD_PARALLEL_LEVEL="${PARALLEL_LEVEL:-$(nproc --all --ignore=1)}" \
 python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
 
 # pure-python packages should be marked as pure, and not have auditwheel run on them.

From 4f5c543e272e339522cf638957720f26f202204e Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Fri, 24 May 2024 17:20:33 +0000
Subject: [PATCH 25/31] move env to top level

---
 .github/workflows/pr.yaml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index adbea3b799d..1632431a8a8 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -9,6 +9,9 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: true
 
+env:
+  PARALLEL_LEVEL: "31"
+
 jobs:
   pr-builder:
     needs:
@@ -44,8 +47,6 @@ jobs:
     needs: checks
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.06
-    env:
-      PARALLEL_LEVEL: "31"
     with:
       build_type: pull-request
       node_type: cpu32

From feb4b2ceae056cfbcbbbedd4b4b875aaab4081fb Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Fri, 24 May 2024 18:34:28 +0000
Subject: [PATCH 26/31] add PARALLEL_LEVEL to ci/build_{cpp,wheel}.sh

---
 .github/workflows/pr.yaml | 3 ---
 ci/build_cpp.sh           | 1 +
 ci/build_wheel.sh         | 1 +
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 1632431a8a8..5733646a8b9 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -9,9 +9,6 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: true
 
-env:
-  PARALLEL_LEVEL: "31"
-
 jobs:
   pr-builder:
     needs:
diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
index 132231e4a64..33e13392732 100755
--- a/ci/build_cpp.sh
+++ b/ci/build_cpp.sh
@@ -17,6 +17,7 @@ version=$(rapids-generate-version)
 
 rapids-logger "Begin cpp build"
 
+PARALLEL_LEVEL="${PARALLEL_LEVEL:-$(nproc --all --ignore=1)}" \
 RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild conda/recipes/libcugraph
 
 rapids-upload-conda-to-s3 cpp
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 1e8491d67be..83c940ffc05 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -56,6 +56,7 @@ fi
 
 cd "${package_dir}"
 
+PARALLEL_LEVEL="${PARALLEL_LEVEL:-$(nproc --all --ignore=1)}" \
 CMAKE_BUILD_PARALLEL_LEVEL="${PARALLEL_LEVEL:-$(nproc --all --ignore=1)}" \
 python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
 

From 80a091cd7fbc3e87ff741ac6725e2ffe5462e74d Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Fri, 24 May 2024 18:35:46 +0000
Subject: [PATCH 27/31] override parallel_level to n_cpus - 1

---
 ci/build_cpp.sh   | 2 +-
 ci/build_wheel.sh | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
index 33e13392732..73783896496 100755
--- a/ci/build_cpp.sh
+++ b/ci/build_cpp.sh
@@ -17,7 +17,7 @@ version=$(rapids-generate-version)
 
 rapids-logger "Begin cpp build"
 
-PARALLEL_LEVEL="${PARALLEL_LEVEL:-$(nproc --all --ignore=1)}" \
+PARALLEL_LEVEL="$(nproc --all --ignore=1)" \
 RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild conda/recipes/libcugraph
 
 rapids-upload-conda-to-s3 cpp
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 83c940ffc05..d6bc138d28e 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -56,8 +56,8 @@ fi
 
 cd "${package_dir}"
 
-PARALLEL_LEVEL="${PARALLEL_LEVEL:-$(nproc --all --ignore=1)}" \
-CMAKE_BUILD_PARALLEL_LEVEL="${PARALLEL_LEVEL:-$(nproc --all --ignore=1)}" \
+PARALLEL_LEVEL="$(nproc --all --ignore=1)" \
+CMAKE_BUILD_PARALLEL_LEVEL="$(nproc --all --ignore=1)" \
 python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
 
 # pure-python packages should be marked as pure, and not have auditwheel run on them.

From a2c383eabca89e34b38e539ac1d8df666111623e Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Fri, 24 May 2024 19:41:47 +0000
Subject: [PATCH 28/31] limit parallelism to 8

---
 ci/build_cpp.sh   | 2 +-
 ci/build_wheel.sh | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
index 73783896496..611eb7d08fb 100755
--- a/ci/build_cpp.sh
+++ b/ci/build_cpp.sh
@@ -17,7 +17,7 @@ version=$(rapids-generate-version)
 
 rapids-logger "Begin cpp build"
 
-PARALLEL_LEVEL="$(nproc --all --ignore=1)" \
+PARALLEL_LEVEL="8" \
 RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild conda/recipes/libcugraph
 
 rapids-upload-conda-to-s3 cpp
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index d6bc138d28e..2367a4a109f 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -56,8 +56,8 @@ fi
 
 cd "${package_dir}"
 
-PARALLEL_LEVEL="$(nproc --all --ignore=1)" \
-CMAKE_BUILD_PARALLEL_LEVEL="$(nproc --all --ignore=1)" \
+PARALLEL_LEVEL="8" \
+CMAKE_BUILD_PARALLEL_LEVEL="8" \
 python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
 
 # pure-python packages should be marked as pure, and not have auditwheel run on them.

From 394b07bb742a42b3ad8f1bc14838540b02d43ff3 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Tue, 28 May 2024 10:22:51 -0700
Subject: [PATCH 29/31] increase parallelism to 16

---
 ci/build_cpp.sh   | 2 +-
 ci/build_wheel.sh | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
index 611eb7d08fb..f511383363a 100755
--- a/ci/build_cpp.sh
+++ b/ci/build_cpp.sh
@@ -17,7 +17,7 @@ version=$(rapids-generate-version)
 
 rapids-logger "Begin cpp build"
 
-PARALLEL_LEVEL="8" \
+PARALLEL_LEVEL="16" \
 RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild conda/recipes/libcugraph
 
 rapids-upload-conda-to-s3 cpp
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 2367a4a109f..00210a55919 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -56,8 +56,8 @@ fi
 
 cd "${package_dir}"
 
-PARALLEL_LEVEL="8" \
-CMAKE_BUILD_PARALLEL_LEVEL="8" \
+PARALLEL_LEVEL="16" \
+CMAKE_BUILD_PARALLEL_LEVEL="16" \
 python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
 
 # pure-python packages should be marked as pure, and not have auditwheel run on them.

From 325e0f6b3612d65fa2b01fd2351fc29529d30b21 Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Tue, 28 May 2024 13:53:09 -0700
Subject: [PATCH 30/31] revert changes to rapids_config.cmake

---
 ci/build_cpp.sh                                    | 1 -
 ci/build_wheel.sh                                  | 2 --
 cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake | 6 +++---
 rapids_config.cmake                                | 3 ---
 4 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
index f511383363a..132231e4a64 100755
--- a/ci/build_cpp.sh
+++ b/ci/build_cpp.sh
@@ -17,7 +17,6 @@ version=$(rapids-generate-version)
 
 rapids-logger "Begin cpp build"
 
-PARALLEL_LEVEL="16" \
 RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild conda/recipes/libcugraph
 
 rapids-upload-conda-to-s3 cpp
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 00210a55919..587c5fb38e7 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -56,8 +56,6 @@ fi
 
 cd "${package_dir}"
 
-PARALLEL_LEVEL="16" \
-CMAKE_BUILD_PARALLEL_LEVEL="16" \
 python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
 
 # pure-python packages should be marked as pure, and not have auditwheel run on them.
diff --git a/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake b/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake
index f8020296381..8d57bf570bb 100644
--- a/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake
+++ b/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -42,6 +42,6 @@ set(CUGRAPH_ETL_BRANCH_VERSION_cudf "${CUGRAPH_ETL_VERSION_MAJOR}.${CUGRAPH_ETL_
 # To use a different RAFT locally, set the CMake variable
 # RPM_cudf_SOURCE=/path/to/local/cudf
 find_and_configure_cudf(VERSION    ${CUGRAPH_ETL_MIN_VERSION_cudf}
-                        FORK       trxcllnt
-                        PINNED_TAG fix/cccl-2.5
+                        FORK       rapidsai
+                        PINNED_TAG branch-${CUGRAPH_ETL_BRANCH_VERSION_cudf}
                         )
diff --git a/rapids_config.cmake b/rapids_config.cmake
index 06784f6f8bd..50b1054b7b9 100644
--- a/rapids_config.cmake
+++ b/rapids_config.cmake
@@ -25,9 +25,6 @@ else()
       "Could not determine RAPIDS version. Contents of VERSION file:\n${_rapids_version_formatted}")
 endif()
 
-set(rapids-cmake-repo trxcllnt/rapids-cmake)
-set(rapids-cmake-branch fea/cccl-2.5)
-
 if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/CUGRAPH_RAPIDS-${RAPIDS_VERSION_MAJOR_MINOR}.cmake")
   file(
     DOWNLOAD

From 06dee7ebae8b87e1198eabf1b7837bbcc1388a3d Mon Sep 17 00:00:00 2001
From: ptaylor <paul.e.taylor@me.com>
Date: Tue, 28 May 2024 15:43:14 -0700
Subject: [PATCH 31/31] remove workaround kv_store ctor

---
 cpp/src/prims/kv_store.cuh                    | 24 -------------------
 ...m_reduce_dst_key_aggregated_outgoing_e.cuh |  2 +-
 2 files changed, 1 insertion(+), 25 deletions(-)

diff --git a/cpp/src/prims/kv_store.cuh b/cpp/src/prims/kv_store.cuh
index e28c8c3068f..de233fd583b 100644
--- a/cpp/src/prims/kv_store.cuh
+++ b/cpp/src/prims/kv_store.cuh
@@ -946,30 +946,6 @@ class kv_store_t {
   {
   }
 
-  /* when use_binary_search = true */
-  template <bool binary_search = use_binary_search>
-  kv_store_t(rmm::device_uvector<key_t>&& keys,
-             decltype(allocate_dataframe_buffer<value_t>(0, rmm::cuda_stream_view{}))&& values,
-             decltype(cugraph::invalid_idx<key_t>::value)
-               invalid_value /* invalid_value is returned when match fails for the given key */,
-             bool key_sorted /* if set to true, assume that the input data is sorted and skip
-                                sorting (which is necessary for binary-search) */
-             ,
-             rmm::cuda_stream_view stream,
-             std::enable_if_t<binary_search && is_thrust_tuple<value_t>::value, int32_t> = 0)
-    : store_(
-        std::move(keys),
-        std::move(values),
-        [=]() {
-          auto invalid_row            = value_t{};
-          thrust::get<0>(invalid_row) = invalid_value;
-          return invalid_row;
-        }(),
-        key_sorted,
-        stream)
-  {
-  }
-
   /* when use binary_search = false, this requires that the capacity is large enough */
   template <typename KeyIterator, typename ValueIterator, bool binary_search = use_binary_search>
   std::enable_if_t<!binary_search, void> insert(KeyIterator key_first,
diff --git a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh
index 006d7760666..7be30b0a5f0 100644
--- a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh
+++ b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh
@@ -754,7 +754,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
             std::make_unique<kv_store_t<vertex_t, edge_src_value_t, true>>(
               std::move(majors),
               std::move(edge_major_values),
-              invalid_vertex_id<vertex_t>::value,
+              edge_src_value_t{},
               true,
               handle.get_stream());
         }