Skip to content

Commit

Permalink
Update primitives to support DCSR (DCSC) segments (Part 1) (#1690)
Browse files Browse the repository at this point in the history
Update graph primitives to support DCSR (DCSC) segments (except for the ones used by Louvain, graph primitives used in Louvain will be updated in a separate PR with thread-divergence optimization & more testing).

DCSR (DCSC) segment support is still disabled (as enabling this will break Louvain).

Authors:
  - Seunghwa Kang (https://github.com/seunghwak)
  - Mark Harris (https://github.com/harrism)
  - Chuck Hastings (https://github.com/ChuckHastings)
  - AJ Schmidt (https://github.com/ajschmidt8)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Brad Rees (https://github.com/BradReesWork)

URL: #1690
  • Loading branch information
seunghwak authored Jul 13, 2021
1 parent 1a63602 commit 04f73b8
Show file tree
Hide file tree
Showing 9 changed files with 607 additions and 192 deletions.
61 changes: 34 additions & 27 deletions cpp/include/cugraph/experimental/detail/graph_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@ template <typename vertex_t, typename edge_t>
rmm::device_uvector<edge_t> compute_major_degrees(
raft::handle_t const& handle,
std::vector<edge_t const*> const& adj_matrix_partition_offsets,
partition_t<vertex_t> const& partition)
std::optional<std::vector<vertex_t const*>> const& adj_matrix_partition_dcs_nzd_vertices,
std::optional<std::vector<vertex_t>> const& adj_matrix_partition_dcs_nzd_vertex_counts,
partition_t<vertex_t> const& partition,
std::optional<std::vector<vertex_t>> const& adj_matrix_partition_segment_offsets)
{
auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name());
auto const row_comm_rank = row_comm.get_rank();
Expand All @@ -52,6 +55,8 @@ rmm::device_uvector<edge_t> compute_major_degrees(
auto const col_comm_rank = col_comm.get_rank();
auto const col_comm_size = col_comm.get_size();

auto use_dcs = adj_matrix_partition_dcs_nzd_vertices.has_value();

rmm::device_uvector<edge_t> local_degrees(0, handle.get_stream());
rmm::device_uvector<edge_t> degrees(0, handle.get_stream());

Expand All @@ -69,11 +74,37 @@ rmm::device_uvector<edge_t> compute_major_degrees(
vertex_t major_last{};
std::tie(major_first, major_last) = partition.get_vertex_partition_range(vertex_partition_idx);
auto p_offsets = adj_matrix_partition_offsets[i];
auto major_hypersparse_first =
use_dcs ? major_first + (*adj_matrix_partition_segment_offsets)
[(detail::num_sparse_segments_per_vertex_partition + 2) * i +
detail::num_sparse_segments_per_vertex_partition]
: major_last;
thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::make_counting_iterator(vertex_t{0}),
thrust::make_counting_iterator(major_last - major_first),
local_degrees.data(),
thrust::make_counting_iterator(major_hypersparse_first - major_first),
local_degrees.begin(),
[p_offsets] __device__(auto i) { return p_offsets[i + 1] - p_offsets[i]; });
if (use_dcs) {
auto p_dcs_nzd_vertices = (*adj_matrix_partition_dcs_nzd_vertices)[i];
auto dcs_nzd_vertex_count = (*adj_matrix_partition_dcs_nzd_vertex_counts)[i];
thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
local_degrees.begin() + (major_hypersparse_first - major_first),
local_degrees.begin() + (major_last - major_first),
edge_t{0});
thrust::for_each(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::make_counting_iterator(vertex_t{0}),
thrust::make_counting_iterator(dcs_nzd_vertex_count),
[p_offsets,
p_dcs_nzd_vertices,
major_first,
major_hypersparse_first,
local_degrees = local_degrees.data()] __device__(auto i) {
auto d = p_offsets[(major_hypersparse_first - major_first) + i + 1] -
p_offsets[(major_hypersparse_first - major_first) + i];
auto v = p_dcs_nzd_vertices[i];
local_degrees[v - major_first] = d;
});
}
col_comm.reduce(local_degrees.data(),
i == col_comm_rank ? degrees.data() : static_cast<edge_t*>(nullptr),
static_cast<size_t>(major_last - major_first),
Expand All @@ -85,23 +116,6 @@ rmm::device_uvector<edge_t> compute_major_degrees(
return degrees;
}

// compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed =
// false) or columns (of the graph adjacency matrix, if store_transposed = true)
template <typename vertex_t, typename edge_t>
rmm::device_uvector<edge_t> compute_major_degrees(
raft::handle_t const& handle,
std::vector<rmm::device_uvector<edge_t>> const& adj_matrix_partition_offsets,
partition_t<vertex_t> const& partition)
{
// we can avoid creating this temporary with "if constexpr" supported from C++17
std::vector<edge_t const*> tmp_offsets(adj_matrix_partition_offsets.size(), nullptr);
std::transform(adj_matrix_partition_offsets.begin(),
adj_matrix_partition_offsets.end(),
tmp_offsets.begin(),
[](auto const& offsets) { return offsets.data(); });
return compute_major_degrees(handle, tmp_offsets, partition);
}

// compute the numbers of nonzeros in rows (of the graph adjacency matrix, if store_transposed =
// false) or columns (of the graph adjacency matrix, if store_transposed = true)
template <typename vertex_t, typename edge_t>
Expand All @@ -117,13 +131,6 @@ rmm::device_uvector<edge_t> compute_major_degrees(raft::handle_t const& handle,
return degrees;
}

template <typename vertex_t, typename edge_t>
struct degree_from_offsets_t {
edge_t const* offsets{nullptr};

__device__ edge_t operator()(vertex_t v) { return offsets[v + 1] - offsets[v]; }
};

template <typename vertex_t>
struct compute_gpu_id_from_vertex_t {
int comm_size{0};
Expand Down
85 changes: 78 additions & 7 deletions cpp/include/cugraph/matrix_partition_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,16 @@
*/
#pragma once

#include <cugraph/experimental/graph.hpp>
#include <cugraph/experimental/graph_view.hpp>
#include <cugraph/utilities/error.hpp>

#include <thrust/binary_search.h>
#include <thrust/distance.h>
#include <thrust/optional.h>
#include <thrust/tuple.h>

#include <cassert>
#include <optional>
#include <type_traits>

Expand Down Expand Up @@ -49,25 +53,28 @@ class matrix_partition_device_view_base_t {
__host__ __device__ vertex_t const* get_indices() const { return indices_; }
__host__ __device__ thrust::optional<weight_t const*> get_weights() const { return weights_; }

// major_idx == major offset if CSR/CSC, major_offset != major_idx if DCSR/DCSC
__device__ thrust::tuple<vertex_t const*, thrust::optional<weight_t const*>, edge_t>
get_local_edges(vertex_t major_offset) const noexcept
get_local_edges(vertex_t major_idx) const noexcept
{
auto edge_offset = *(offsets_ + major_offset);
auto local_degree = *(offsets_ + (major_offset + 1)) - edge_offset;
auto edge_offset = *(offsets_ + major_idx);
auto local_degree = *(offsets_ + (major_idx + 1)) - edge_offset;
auto indices = indices_ + edge_offset;
auto weights =
weights_ ? thrust::optional<weight_t const*>{*weights_ + edge_offset} : thrust::nullopt;
return thrust::make_tuple(indices, weights, local_degree);
}

__device__ edge_t get_local_degree(vertex_t major_offset) const noexcept
// major_idx == major offset if CSR/CSC, major_offset != major_idx if DCSR/DCSC
__device__ edge_t get_local_degree(vertex_t major_idx) const noexcept
{
return *(offsets_ + (major_offset + 1)) - *(offsets_ + major_offset);
return *(offsets_ + (major_idx + 1)) - *(offsets_ + major_idx);
}

__device__ edge_t get_local_offset(vertex_t major_offset) const noexcept
// major_idx == major offset if CSR/CSC, major_offset != major_idx if DCSR/DCSC
__device__ edge_t get_local_offset(vertex_t major_idx) const noexcept
{
return *(offsets_ + major_offset);
return *(offsets_ + major_idx);
}

private:
Expand Down Expand Up @@ -148,6 +155,34 @@ class matrix_partition_device_view_t<vertex_t,
return major_first_ + major_offset;
}

// major_hypersparse_idx: index within the hypersparse segment
__host__ __device__ thrust::optional<vertex_t> get_major_hypersparse_idx_from_major_nocheck(
vertex_t major) const noexcept
{
if (dcs_nzd_vertices_) {
// we can avoid binary search (and potentially improve performance) if we add an auxiliary
// array or cuco::static_map (at the expense of additional memory)
auto it = thrust::lower_bound(
thrust::seq, *dcs_nzd_vertices_, *dcs_nzd_vertices_ + *dcs_nzd_vertex_count_, major);
return it != *dcs_nzd_vertices_ + *dcs_nzd_vertex_count_
? (*it == major ? thrust::optional<vertex_t>{static_cast<vertex_t>(
thrust::distance(*dcs_nzd_vertices_, it))}
: thrust::nullopt)
: thrust::nullopt;
} else {
return thrust::nullopt;
}
}

// major_hypersparse_idx: index within the hypersparse segment
__host__ __device__ thrust::optional<vertex_t> get_major_from_major_hypersparse_idx_nocheck(
vertex_t major_hypersparse_idx) const noexcept
{
return dcs_nzd_vertices_
? thrust::optional<vertex_t>{(*dcs_nzd_vertices_)[major_hypersparse_idx]}
: thrust::nullopt;
}

__host__ __device__ vertex_t
get_minor_from_minor_offset_nocheck(vertex_t minor_offset) const noexcept
{
Expand All @@ -159,6 +194,15 @@ class matrix_partition_device_view_t<vertex_t,
return major_value_start_offset_;
}

__host__ __device__ thrust::optional<vertex_t const*> get_dcs_nzd_vertices() const
{
return dcs_nzd_vertices_;
}
__host__ __device__ thrust::optional<vertex_t> get_dcs_nzd_vertex_count() const
{
return dcs_nzd_vertex_count_;
}

private:
// should be trivially copyable to device

Expand Down Expand Up @@ -220,12 +264,39 @@ class matrix_partition_device_view_t<vertex_t,
return major_offset;
}

// major_hypersparse_idx: index within the hypersparse segment
__host__ __device__ thrust::optional<vertex_t> get_major_hypersparse_idx_from_major_nocheck(
vertex_t major) const noexcept
{
assert(false);
return thrust::nullopt;
}

// major_hypersparse_idx: index within the hypersparse segment
__host__ __device__ thrust::optional<vertex_t> get_major_from_major_hypersparse_idx_nocheck(
vertex_t major_hypersparse_idx) const noexcept
{
assert(false);
return thrust::nullopt;
}

__host__ __device__ vertex_t
get_minor_from_minor_offset_nocheck(vertex_t minor_offset) const noexcept
{
return minor_offset;
}

__host__ __device__ thrust::optional<vertex_t const*> get_dcs_nzd_vertices() const
{
assert(false);
return thrust::nullopt;
}
__host__ __device__ thrust::optional<vertex_t> get_dcs_nzd_vertex_count() const
{
assert(false);
return thrust::nullopt;
}

private:
vertex_t number_of_vertices_;
};
Expand Down
3 changes: 3 additions & 0 deletions cpp/include/cugraph/matrix_partition_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ class matrix_partition_view_t<vertex_t, edge_t, weight_t, multi_gpu, std::enable
{
}

std::optional<vertex_t const*> get_dcs_nzd_vertices() const { return std::nullopt; }
std::optional<vertex_t> get_dcs_nzd_vertex_count() const { return std::nullopt; }

vertex_t get_major_first() const { return vertex_t{0}; }
vertex_t get_major_last() const { return number_of_vertices_; }
vertex_t get_minor_first() const { return vertex_t{0}; }
Expand Down
Loading

0 comments on commit 04f73b8

Please sign in to comment.