Update primitives to support DCSR (DCSC) segments (Part 1) (#1690)

Update graph primitives to support DCSR (DCSC) segments (except for the ones used by Louvain, graph primitives used in Louvain will be updated in a separate PR with thread-divergence optimization & more testing). DCSR (DCSC) segment support is still disabled (as enabling this will break Louvain). Authors: - Seunghwa Kang (https://github.com/seunghwak) - Mark Harris (https://github.com/harrism) - Chuck Hastings (https://github.com/ChuckHastings) - AJ Schmidt (https://github.com/ajschmidt8) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Brad Rees (https://github.com/BradReesWork) URL: #1690
rapidsai · Jul 13, 2021 · 04f73b8 · 04f73b8
1 parent 1a63602
commit 04f73b8
Show file tree

Hide file tree

Showing 9 changed files with 607 additions and 192 deletions.
diff --git a/cpp/include/cugraph/experimental/detail/graph_utils.cuh b/cpp/include/cugraph/experimental/detail/graph_utils.cuh
@@ -43,7 +43,10 @@ template <typename vertex_t, typename edge_t>
 rmm::device_uvector<edge_t> compute_major_degrees(
   raft::handle_t const& handle,
   std::vector<edge_t const*> const& adj_matrix_partition_offsets,
-  partition_t<vertex_t> const& partition)
+  std::optional<std::vector<vertex_t const*>> const& adj_matrix_partition_dcs_nzd_vertices,
+  std::optional<std::vector<vertex_t>> const& adj_matrix_partition_dcs_nzd_vertex_counts,
+  partition_t<vertex_t> const& partition,
+  std::optional<std::vector<vertex_t>> const& adj_matrix_partition_segment_offsets)
 {
   auto& row_comm           = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name());
   auto const row_comm_rank = row_comm.get_rank();
@@ -52,6 +55,8 @@ rmm::device_uvector<edge_t> compute_major_degrees(
   auto const col_comm_rank = col_comm.get_rank();
   auto const col_comm_size = col_comm.get_size();
 
+  auto use_dcs = adj_matrix_partition_dcs_nzd_vertices.has_value();
+
   rmm::device_uvector<edge_t> local_degrees(0, handle.get_stream());
   rmm::device_uvector<edge_t> degrees(0, handle.get_stream());
 
@@ -69,11 +74,37 @@ rmm::device_uvector<edge_t> compute_major_degrees(
     vertex_t major_last{};
     std::tie(major_first, major_last) = partition.get_vertex_partition_range(vertex_partition_idx);
     auto p_offsets                    = adj_matrix_partition_offsets[i];
+    auto major_hypersparse_first =
+      use_dcs ? major_first + (*adj_matrix_partition_segment_offsets)
+                                [(detail::num_sparse_segments_per_vertex_partition + 2) * i +
+                                 detail::num_sparse_segments_per_vertex_partition]
+              : major_last;
     thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
                       thrust::make_counting_iterator(vertex_t{0}),
-                      thrust::make_counting_iterator(major_last - major_first),
-                      local_degrees.data(),
+                      thrust::make_counting_iterator(major_hypersparse_first - major_first),
+                      local_degrees.begin(),
                       [p_offsets] __device__(auto i) { return p_offsets[i + 1] - p_offsets[i]; });
+    if (use_dcs) {
+      auto p_dcs_nzd_vertices   = (*adj_matrix_partition_dcs_nzd_vertices)[i];
+      auto dcs_nzd_vertex_count = (*adj_matrix_partition_dcs_nzd_vertex_counts)[i];
+      thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+                   local_degrees.begin() + (major_hypersparse_first - major_first),
+                   local_degrees.begin() + (major_last - major_first),
+                   edge_t{0});
+      thrust::for_each(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+                       thrust::make_counting_iterator(vertex_t{0}),
+                       thrust::make_counting_iterator(dcs_nzd_vertex_count),
+                       [p_offsets,
+                        p_dcs_nzd_vertices,
+                        major_first,
+                        major_hypersparse_first,
+                        local_degrees = local_degrees.data()] __device__(auto i) {
+                         auto d = p_offsets[(major_hypersparse_first - major_first) + i + 1] -
+                                  p_offsets[(major_hypersparse_first - major_first) + i];
+                         auto v                         = p_dcs_nzd_vertices[i];
+                         local_degrees[v - major_first] = d;
+                       });
+    }
     col_comm.reduce(local_degrees.data(),
                     i == col_comm_rank ? degrees.data() : static_cast<edge_t*>(nullptr),
                     static_cast<size_t>(major_last - major_first),
@@ -85,23 +116,6 @@ rmm::device_uvector<edge_t> compute_major_degrees(
   return degrees;
 }
 
-// compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed =
-// false) or columns (of the graph adjacency matrix, if store_transposed = true)
-template <typename vertex_t, typename edge_t>
-rmm::device_uvector<edge_t> compute_major_degrees(
-  raft::handle_t const& handle,
-  std::vector<rmm::device_uvector<edge_t>> const& adj_matrix_partition_offsets,
-  partition_t<vertex_t> const& partition)
-{
-  // we can avoid creating this temporary with "if constexpr" supported from C++17
-  std::vector<edge_t const*> tmp_offsets(adj_matrix_partition_offsets.size(), nullptr);
-  std::transform(adj_matrix_partition_offsets.begin(),
-                 adj_matrix_partition_offsets.end(),
-                 tmp_offsets.begin(),
-                 [](auto const& offsets) { return offsets.data(); });
-  return compute_major_degrees(handle, tmp_offsets, partition);
-}
-
 // compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed =
 // false) or columns (of the graph adjacency matrix, if store_transposed = true)
 template <typename vertex_t, typename edge_t>
@@ -117,13 +131,6 @@ rmm::device_uvector<edge_t> compute_major_degrees(raft::handle_t const& handle,
   return degrees;
 }
 
-template <typename vertex_t, typename edge_t>
-struct degree_from_offsets_t {
-  edge_t const* offsets{nullptr};
-
-  __device__ edge_t operator()(vertex_t v) { return offsets[v + 1] - offsets[v]; }
-};
-
 template <typename vertex_t>
 struct compute_gpu_id_from_vertex_t {
   int comm_size{0};

diff --git a/cpp/include/cugraph/matrix_partition_device_view.cuh b/cpp/include/cugraph/matrix_partition_device_view.cuh
@@ -15,12 +15,16 @@
  */
 #pragma once
 
+#include <cugraph/experimental/graph.hpp>
 #include <cugraph/experimental/graph_view.hpp>
 #include <cugraph/utilities/error.hpp>
 
+#include <thrust/binary_search.h>
+#include <thrust/distance.h>
 #include <thrust/optional.h>
 #include <thrust/tuple.h>
 
+#include <cassert>
 #include <optional>
 #include <type_traits>
 
@@ -49,25 +53,28 @@ class matrix_partition_device_view_base_t {
   __host__ __device__ vertex_t const* get_indices() const { return indices_; }
   __host__ __device__ thrust::optional<weight_t const*> get_weights() const { return weights_; }
 
+  // major_idx == major offset if CSR/CSC, major_offset != major_idx if DCSR/DCSC
   __device__ thrust::tuple<vertex_t const*, thrust::optional<weight_t const*>, edge_t>
-  get_local_edges(vertex_t major_offset) const noexcept
+  get_local_edges(vertex_t major_idx) const noexcept
   {
-    auto edge_offset  = *(offsets_ + major_offset);
-    auto local_degree = *(offsets_ + (major_offset + 1)) - edge_offset;
+    auto edge_offset  = *(offsets_ + major_idx);
+    auto local_degree = *(offsets_ + (major_idx + 1)) - edge_offset;
     auto indices      = indices_ + edge_offset;
     auto weights =
       weights_ ? thrust::optional<weight_t const*>{*weights_ + edge_offset} : thrust::nullopt;
     return thrust::make_tuple(indices, weights, local_degree);
   }
 
-  __device__ edge_t get_local_degree(vertex_t major_offset) const noexcept
+  // major_idx == major offset if CSR/CSC, major_offset != major_idx if DCSR/DCSC
+  __device__ edge_t get_local_degree(vertex_t major_idx) const noexcept
   {
-    return *(offsets_ + (major_offset + 1)) - *(offsets_ + major_offset);
+    return *(offsets_ + (major_idx + 1)) - *(offsets_ + major_idx);
   }
 
-  __device__ edge_t get_local_offset(vertex_t major_offset) const noexcept
+  // major_idx == major offset if CSR/CSC, major_offset != major_idx if DCSR/DCSC
+  __device__ edge_t get_local_offset(vertex_t major_idx) const noexcept
   {
-    return *(offsets_ + major_offset);
+    return *(offsets_ + major_idx);
   }
 
  private:
@@ -148,6 +155,34 @@ class matrix_partition_device_view_t<vertex_t,
     return major_first_ + major_offset;
   }
 
+  // major_hypersparse_idx: index within the hypersparse segment
+  __host__ __device__ thrust::optional<vertex_t> get_major_hypersparse_idx_from_major_nocheck(
+    vertex_t major) const noexcept
+  {
+    if (dcs_nzd_vertices_) {
+      // we can avoid binary search (and potentially improve performance) if we add an auxiliary
+      // array or cuco::static_map (at the expense of additional memory)
+      auto it = thrust::lower_bound(
+        thrust::seq, *dcs_nzd_vertices_, *dcs_nzd_vertices_ + *dcs_nzd_vertex_count_, major);
+      return it != *dcs_nzd_vertices_ + *dcs_nzd_vertex_count_
+               ? (*it == major ? thrust::optional<vertex_t>{static_cast<vertex_t>(
+                                   thrust::distance(*dcs_nzd_vertices_, it))}
+                               : thrust::nullopt)
+               : thrust::nullopt;
+    } else {
+      return thrust::nullopt;
+    }
+  }
+
+  // major_hypersparse_idx: index within the hypersparse segment
+  __host__ __device__ thrust::optional<vertex_t> get_major_from_major_hypersparse_idx_nocheck(
+    vertex_t major_hypersparse_idx) const noexcept
+  {
+    return dcs_nzd_vertices_
+             ? thrust::optional<vertex_t>{(*dcs_nzd_vertices_)[major_hypersparse_idx]}
+             : thrust::nullopt;
+  }
+
   __host__ __device__ vertex_t
   get_minor_from_minor_offset_nocheck(vertex_t minor_offset) const noexcept
   {
@@ -159,6 +194,15 @@ class matrix_partition_device_view_t<vertex_t,
     return major_value_start_offset_;
   }
 
+  __host__ __device__ thrust::optional<vertex_t const*> get_dcs_nzd_vertices() const
+  {
+    return dcs_nzd_vertices_;
+  }
+  __host__ __device__ thrust::optional<vertex_t> get_dcs_nzd_vertex_count() const
+  {
+    return dcs_nzd_vertex_count_;
+  }
+
  private:
   // should be trivially copyable to device
 
@@ -220,12 +264,39 @@ class matrix_partition_device_view_t<vertex_t,
     return major_offset;
   }
 
+  // major_hypersparse_idx: index within the hypersparse segment
+  __host__ __device__ thrust::optional<vertex_t> get_major_hypersparse_idx_from_major_nocheck(
+    vertex_t major) const noexcept
+  {
+    assert(false);
+    return thrust::nullopt;
+  }
+
+  // major_hypersparse_idx: index within the hypersparse segment
+  __host__ __device__ thrust::optional<vertex_t> get_major_from_major_hypersparse_idx_nocheck(
+    vertex_t major_hypersparse_idx) const noexcept
+  {
+    assert(false);
+    return thrust::nullopt;
+  }
+
   __host__ __device__ vertex_t
   get_minor_from_minor_offset_nocheck(vertex_t minor_offset) const noexcept
   {
     return minor_offset;
   }
 
+  __host__ __device__ thrust::optional<vertex_t const*> get_dcs_nzd_vertices() const
+  {
+    assert(false);
+    return thrust::nullopt;
+  }
+  __host__ __device__ thrust::optional<vertex_t> get_dcs_nzd_vertex_count() const
+  {
+    assert(false);
+    return thrust::nullopt;
+  }
+
  private:
   vertex_t number_of_vertices_;
 };

diff --git a/cpp/include/cugraph/matrix_partition_view.hpp b/cpp/include/cugraph/matrix_partition_view.hpp
@@ -123,6 +123,9 @@ class matrix_partition_view_t<vertex_t, edge_t, weight_t, multi_gpu, std::enable
   {
   }
 
+  std::optional<vertex_t const*> get_dcs_nzd_vertices() const { return std::nullopt; }
+  std::optional<vertex_t> get_dcs_nzd_vertex_count() const { return std::nullopt; }
+
   vertex_t get_major_first() const { return vertex_t{0}; }
   vertex_t get_major_last() const { return number_of_vertices_; }
   vertex_t get_minor_first() const { return vertex_t{0}; }