From 02189e8f72ac3d23539ece0fa8f94c65b3e0aa26 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Thu, 23 Mar 2023 16:02:30 -0700 Subject: [PATCH 01/53] first commit --- .../neighbors/detail/connect_components.cuh | 161 +++++++++++++----- 1 file changed, 121 insertions(+), 40 deletions(-) diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 583ff4dfdc..6a6014ef98 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include @@ -27,7 +27,7 @@ #include #include - +#include #include #include @@ -167,6 +167,20 @@ struct LookupColorOp { } }; +template +__global__ void copy_sorted_kernel(const value_t* X, + value_t* X_cpy, + value_idx* src_indices, + size_t n_rows, + raft::util::FastIntDiv n_cols) +{ + value_idx tid = blockDim.x * blockIdx.x + threadIdx.x; + if (tid >= n_rows * n_cols.d) return; + value_idx row = tid / n_cols; + value_idx col = tid % n_cols; + X_cpy[tid] = X[src_indices[row] * n_cols.d + col]; +} + /** * Compute the cross-component 1-nearest neighbors for each row in X using * the given array of components @@ -181,39 +195,104 @@ struct LookupColorOp { * @param[in] n_cols number of columns in original dense data * @param[in] stream cuda stream for which to order cuda operations */ -template -void perform_1nn(raft::KeyValuePair* kvp, +template +void perform_1nn(raft::device_resources const& handle, + raft::KeyValuePair* kvp, value_idx* nn_colors, value_idx* colors, + value_idx* src_indices, const value_t* X, size_t n_rows, - size_t n_cols, - cudaStream_t stream, - red_op reduction_op) + size_t n_cols) { - rmm::device_uvector workspace(n_rows, stream); - rmm::device_uvector x_norm(n_rows, stream); - - raft::linalg::rowNorm(x_norm.data(), X, n_cols, n_rows, raft::linalg::L2Norm, true, stream); - - raft::distance::fusedL2NN, value_idx>( - kvp, - X, - X, - x_norm.data(), - x_norm.data(), - n_rows, - n_rows, - n_cols, - workspace.data(), - reduction_op, - reduction_op, - true, - true, - stream); + auto stream = handle.get_stream(); + raft::print_device_vector("colors_before_sort", colors, n_rows, std::cout); + raft::print_device_vector("X_before_sort", X, 30, std::cout); + raft::print_device_vector("src_indices_before_sort", src_indices, n_rows, std::cout); + // Sort data points by color + thrust::sort_by_key(handle.get_thrust_policy(), colors, colors + n_rows, thrust::make_zip_iterator(thrust::make_tuple(src_indices))); + + auto X_cpy = raft::make_device_matrix(handle, n_rows, n_cols); + copy_sorted_kernel<<>>(X, + X_cpy.data_handle(), + src_indices, + n_rows, + raft::util::FastIntDiv(n_cols)); + + raft::print_device_vector("colors_after_sort", colors, n_rows, std::cout); + raft::print_device_vector("X_after_sort", X, 30, std::cout); + raft::print_device_vector("src_indices_after_sort", src_indices, n_rows, std::cout); + auto x_norm = raft::make_device_vector(handle, n_rows); + // rmm::device_uvector x_norm(n_rows, stream); + + raft::linalg::rowNorm(x_norm.data_handle(), X_cpy.data_handle(), n_cols, n_rows, raft::linalg::L2Norm, true, stream); + + // auto x_norm_view = raft::make_device_vector_view(x_norm.data(), n_rows); + value_idx n_components = get_n_components(colors, n_rows, stream); + auto colors_group_idxs = raft::make_device_vector (handle, n_components + 1); + raft::sparse::convert::sorted_coo_to_csr(colors, n_rows, colors_group_idxs.data_handle(), n_components + 1, stream); + RAFT_LOG_INFO("n_comps %d", n_components); + + raft::print_device_vector("colors", colors, n_rows, std::cout); + raft::print_device_vector("colors_csr", colors_group_idxs.data_handle(), n_components + 1, std::cout); + // auto colors_group_idxs_view = raft::make_device_vector_view(colors_group_idxs.data(), n_components); + auto adj = raft::make_device_matrix (handle, n_rows, n_components); + // rmm::device_uvectoradj(adj_size, stream); + auto adj_iterator = thrust::make_counting_iterator(0); + auto mask_op = [colors, n_components = raft::util::FastIntDiv(n_components), adj = adj.data_handle()] __device__(value_idx idx) { + value_idx row = idx / n_components; + value_idx col = idx % n_components; + return colors[row] != col; + }; + thrust::transform(handle.get_thrust_policy(), + adj_iterator, + adj_iterator + n_rows * n_components, + adj.data_handle(), + mask_op); + handle.sync_stream(stream); + raft::print_device_vector("adj", adj.data_handle(), 30, std::cout); + // auto adj_view = raft::make_device_matrix_view(adj.data(), n_rows, n_components); + auto kvp_view = raft::make_device_vector_view, value_idx>(kvp, n_rows); + using DataT = value_t; + using IdxT = value_idx; + using OutT = raft::KeyValuePair; + using RedOpT = raft::distance::MinAndDistanceReduceOp; + using PairRedOpT = raft::distance::KVPMinReduce; + using ParamT = raft::distance::masked_l2_nn_params; + ParamT params{ + /** Reduction operator in the epilogue */ + RedOpT{}, + + /** Reduction operation on key value pairs */ + PairRedOpT{}, + /** Whether the output `minDist` should contain L2-sqrt */ + true, + /** Whether to initialize the output buffer before the main kernel launch */ + false +}; + raft::distance::masked_l2_nn(handle, + params, + raft::make_const_mdspan(X_cpy.view()), + raft::make_const_mdspan(X_cpy.view()), + raft::make_const_mdspan(x_norm.view()), + raft::make_const_mdspan(x_norm.view()), + raft::make_const_mdspan(adj.view()), + raft::make_device_vector_view(colors_group_idxs.data_handle() + 1, n_components), + kvp_view); + + handle.sync_stream(stream); + RAFT_LOG_INFO("Done until masked l2 distance"); LookupColorOp extract_colors_op(colors); thrust::transform(rmm::exec_policy(stream), kvp, kvp + n_rows, nn_colors, extract_colors_op); + auto fetch_neighbor_indices_op = [kvp, src_indices]__device__ (auto t) { + thrust::get<0>(t).key = src_indices[thrust::get<0>(t).key]; + }; + auto kvp_iterator = thrust::make_zip_iterator(thrust::make_tuple(kvp)); + thrust::for_each(rmm::exec_policy(stream), kvp_iterator, kvp_iterator + n_rows, fetch_neighbor_indices_op); + + handle.sync_stream(stream); + RAFT_LOG_INFO("Done until fetching neighbor indices"); } /** @@ -236,13 +315,9 @@ void sort_by_color(value_idx* colors, size_t n_rows, cudaStream_t stream) { - thrust::counting_iterator arg_sort_iter(0); - thrust::copy(rmm::exec_policy(stream), arg_sort_iter, arg_sort_iter + n_rows, src_indices); - auto keys = thrust::make_zip_iterator( thrust::make_tuple(colors, nn_colors, (KeyValuePair*)kvp)); auto vals = thrust::make_zip_iterator(thrust::make_tuple(src_indices)); - // get all the colors in contiguous locations so we can map them to warps. thrust::sort_by_key(rmm::exec_policy(stream), keys, keys + n_rows, vals, TupleComp()); } @@ -341,8 +416,6 @@ void connect_components( // Normalize colors so they are drawn from a monotonically increasing set raft::label::make_monotonic(colors.data(), colors.data(), n_rows, stream, true); - value_idx n_components = get_n_components(colors.data(), n_rows, stream); - /** * First compute 1-nn for all colors where the color of each data point * is guaranteed to be != color of its nearest neighbor. @@ -351,14 +424,17 @@ void connect_components( rmm::device_uvector> temp_inds_dists(n_rows, stream); rmm::device_uvector src_indices(n_rows, stream); - perform_1nn(temp_inds_dists.data(), - nn_colors.data(), + thrust::counting_iterator arg_sort_iter(0); + thrust::copy(rmm::exec_policy(stream), arg_sort_iter, arg_sort_iter + n_rows, src_indices); + + perform_1nn(handle, + temp_inds_dists.data(), + nn_colors.data(), colors.data(), - X, - n_rows, - n_cols, - stream, - reduction_op); + src_indices.data(), + X, + n_rows, + n_cols); /** * Sort data points by color (neighbors are not sorted) @@ -376,10 +452,15 @@ void connect_components( raft::sparse::op::compute_duplicates_mask( out_index.data(), colors.data(), nn_colors.data(), n_rows, stream); + raft::print_device_vector("colors", colors.data(), n_rows, std::cout); + raft::print_device_vector("nn_colors", nn_colors.data(), n_rows, std::cout); + raft::print_device_vector("out_index", out_index.data(), n_rows, std::cout); + thrust::exclusive_scan(handle.get_thrust_policy(), out_index.data(), out_index.data() + out_index.size(), out_index.data()); + raft::print_device_vector("out_index", out_index.data(), n_rows, std::cout); // compute final size value_idx size = 0; From 7c86e6ededb89a1b20f61fee621e58af06cce1e2 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Thu, 30 Mar 2023 13:37:18 -0700 Subject: [PATCH 02/53] more changes --- cpp/include/raft/cluster/detail/mst.cuh | 6 + .../neighbors/detail/connect_components.cuh | 111 +++++++++--------- 2 files changed, 63 insertions(+), 54 deletions(-) diff --git a/cpp/include/raft/cluster/detail/mst.cuh b/cpp/include/raft/cluster/detail/mst.cuh index 46e31b672e..7007031fe0 100644 --- a/cpp/include/raft/cluster/detail/mst.cuh +++ b/cpp/include/raft/cluster/detail/mst.cuh @@ -80,6 +80,12 @@ void connect_knn_graph( raft::sparse::COO connected_edges(stream); + rmm::device_uvector src_indices(m, stream); + thrust::counting_iterator arg_sort_iter(0); + thrust::copy(rmm::exec_policy(stream), arg_sort_iter, arg_sort_iter + m, src_indices.data()); + + auto tuple_it = thrust::make_zip_iterator(thrust::make_tuple(src_indices, reduction_op.core_dists)); + thrust::sort_by_key(handle.get_thrust_policy(), color, m, tuple_it); raft::sparse::neighbors::connect_components( handle, connected_edges, X, color, m, n, reduction_op); diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 6a6014ef98..6f64c84000 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -56,15 +56,14 @@ namespace raft::sparse::neighbors::detail { */ template struct FixConnectivitiesRedOp { - value_idx* colors; value_idx m; - FixConnectivitiesRedOp(value_idx* colors_, value_idx m_) : colors(colors_), m(m_){}; + FixConnectivitiesRedOp(value_idx m_) : m(m_){}; typedef typename raft::KeyValuePair KVP; DI void operator()(value_idx rit, KVP* out, const KVP& other) { - if (rit < m && other.value < out->value && colors[rit] != colors[other.key]) { + if (rit < m && other.value < out->value) { out->key = other.key; out->value = other.value; } @@ -74,7 +73,7 @@ struct FixConnectivitiesRedOp { operator()(value_idx rit, const KVP& a, const KVP& b) { - if (rit < m && a.value < b.value && colors[rit] != colors[a.key]) { + if (rit < m && a.value < b.value) { return a; } else return b; @@ -195,7 +194,7 @@ __global__ void copy_sorted_kernel(const value_t* X, * @param[in] n_cols number of columns in original dense data * @param[in] stream cuda stream for which to order cuda operations */ -template +template void perform_1nn(raft::device_resources const& handle, raft::KeyValuePair* kvp, value_idx* nn_colors, @@ -203,42 +202,37 @@ void perform_1nn(raft::device_resources const& handle, value_idx* src_indices, const value_t* X, size_t n_rows, - size_t n_cols) + size_t n_cols, + red_op reduction_op) { auto stream = handle.get_stream(); - raft::print_device_vector("colors_before_sort", colors, n_rows, std::cout); - raft::print_device_vector("X_before_sort", X, 30, std::cout); - raft::print_device_vector("src_indices_before_sort", src_indices, n_rows, std::cout); + // raft::print_device_vector("colors_before_sort", colors, n_rows, std::cout); + // raft::print_device_vector("X_before_sort", X, 50, std::cout); + // raft::print_device_vector("src_indices_before_sort", src_indices, n_rows, std::cout); // Sort data points by color - thrust::sort_by_key(handle.get_thrust_policy(), colors, colors + n_rows, thrust::make_zip_iterator(thrust::make_tuple(src_indices))); - - auto X_cpy = raft::make_device_matrix(handle, n_rows, n_cols); - copy_sorted_kernel<<>>(X, - X_cpy.data_handle(), - src_indices, - n_rows, - raft::util::FastIntDiv(n_cols)); - - raft::print_device_vector("colors_after_sort", colors, n_rows, std::cout); - raft::print_device_vector("X_after_sort", X, 30, std::cout); - raft::print_device_vector("src_indices_after_sort", src_indices, n_rows, std::cout); + // thrust::sort_by_key(handle.get_thrust_policy(), colors, colors + n_rows, thrust::make_zip_iterator(thrust::make_tuple(src_indices))); + + // auto X_cpy = raft::make_device_matrix(handle, n_rows, n_cols); + // copy_sorted_kernel<<>>(X, + // X_cpy.data_handle(), + // src_indices, + // n_rows, + // raft::util::FastIntDiv(n_cols)); + // raft::print_device_vector("colors_after_sort", colors, n_rows, std::cout); + // raft::print_device_vector("X_after_sort", X_cpy.data_handle(), 50, std::cout); + // raft::print_device_vector("src_indices_after_sort", src_indices, n_rows, std::cout); auto x_norm = raft::make_device_vector(handle, n_rows); - // rmm::device_uvector x_norm(n_rows, stream); - raft::linalg::rowNorm(x_norm.data_handle(), X_cpy.data_handle(), n_cols, n_rows, raft::linalg::L2Norm, true, stream); + raft::linalg::rowNorm(x_norm.data_handle(), X, n_cols, n_rows, raft::linalg::L2Norm, true, stream); - // auto x_norm_view = raft::make_device_vector_view(x_norm.data(), n_rows); value_idx n_components = get_n_components(colors, n_rows, stream); auto colors_group_idxs = raft::make_device_vector (handle, n_components + 1); raft::sparse::convert::sorted_coo_to_csr(colors, n_rows, colors_group_idxs.data_handle(), n_components + 1, stream); - RAFT_LOG_INFO("n_comps %d", n_components); raft::print_device_vector("colors", colors, n_rows, std::cout); raft::print_device_vector("colors_csr", colors_group_idxs.data_handle(), n_components + 1, std::cout); - // auto colors_group_idxs_view = raft::make_device_vector_view(colors_group_idxs.data(), n_components); auto adj = raft::make_device_matrix (handle, n_rows, n_components); - // rmm::device_uvectoradj(adj_size, stream); auto adj_iterator = thrust::make_counting_iterator(0); auto mask_op = [colors, n_components = raft::util::FastIntDiv(n_components), adj = adj.data_handle()] __device__(value_idx idx) { value_idx row = idx / n_components; @@ -254,30 +248,24 @@ void perform_1nn(raft::device_resources const& handle, raft::print_device_vector("adj", adj.data_handle(), 30, std::cout); // auto adj_view = raft::make_device_matrix_view(adj.data(), n_rows, n_components); auto kvp_view = raft::make_device_vector_view, value_idx>(kvp, n_rows); - using DataT = value_t; - using IdxT = value_idx; - using OutT = raft::KeyValuePair; - using RedOpT = raft::distance::MinAndDistanceReduceOp; - using PairRedOpT = raft::distance::KVPMinReduce; - using ParamT = raft::distance::masked_l2_nn_params; + using OutT = raft::KeyValuePair; + // using RedOpT = raft::distance::MinAndDistanceReduceOp; + // using PairRedOpT = raft::distance::KVPMinReduce; + using ParamT = raft::distance::masked_l2_nn_params; + // raft::distance::initialize, int>( + // handle, kvp, n_rows, std::numeric_limits::max(), RedOpT{}); ParamT params{ - /** Reduction operator in the epilogue */ - RedOpT{}, - - /** Reduction operation on key value pairs */ - PairRedOpT{}, - /** Whether the output `minDist` should contain L2-sqrt */ - true, - /** Whether to initialize the output buffer before the main kernel launch */ - false -}; - raft::distance::masked_l2_nn(handle, + reduction_op, + reduction_op, + true, + true}; + raft::distance::masked_l2_nn(handle, params, - raft::make_const_mdspan(X_cpy.view()), - raft::make_const_mdspan(X_cpy.view()), - raft::make_const_mdspan(x_norm.view()), - raft::make_const_mdspan(x_norm.view()), - raft::make_const_mdspan(adj.view()), + X, + X, + x_norm.view(), + x_norm.view(), + adj.view(), raft::make_device_vector_view(colors_group_idxs.data_handle() + 1, n_components), kvp_view); @@ -285,10 +273,24 @@ void perform_1nn(raft::device_resources const& handle, RAFT_LOG_INFO("Done until masked l2 distance"); LookupColorOp extract_colors_op(colors); thrust::transform(rmm::exec_policy(stream), kvp, kvp + n_rows, nn_colors, extract_colors_op); + + raft::print_device_vector("nn_colors", nn_colors, n_rows, std::cout); auto fetch_neighbor_indices_op = [kvp, src_indices]__device__ (auto t) { thrust::get<0>(t).key = src_indices[thrust::get<0>(t).key]; }; auto kvp_iterator = thrust::make_zip_iterator(thrust::make_tuple(kvp)); + rmm::device_uvector nbrs(n_rows, stream); + rmm::device_uvector dists(n_rows, stream); + + auto nbrs_it = thrust::make_zip_iterator(thrust::make_tuple(kvp, nbrs.data(), dists.data())); + thrust::for_each(handle.get_thrust_policy(), nbrs_it, nbrs_it + n_rows, [=]__device__ (auto t) { + thrust::get<1>(t) = thrust::get<0>(t).key; + thrust::get<2>(t) = thrust::get<0>(t).value; + }); + handle.sync_stream(stream); + raft::print_device_vector("nbrs_original", nbrs.data(), n_rows, std::cout); + raft::print_device_vector("nbrs_dists", dists.data(), n_rows, std::cout); + thrust::for_each(rmm::exec_policy(stream), kvp_iterator, kvp_iterator + n_rows, fetch_neighbor_indices_op); handle.sync_stream(stream); @@ -315,6 +317,9 @@ void sort_by_color(value_idx* colors, size_t n_rows, cudaStream_t stream) { + thrust::counting_iterator arg_sort_iter(0); + thrust::copy(rmm::exec_policy(stream), arg_sort_iter, arg_sort_iter + n_rows, src_indices); + auto keys = thrust::make_zip_iterator( thrust::make_tuple(colors, nn_colors, (KeyValuePair*)kvp)); auto vals = thrust::make_zip_iterator(thrust::make_tuple(src_indices)); @@ -424,17 +429,15 @@ void connect_components( rmm::device_uvector> temp_inds_dists(n_rows, stream); rmm::device_uvector src_indices(n_rows, stream); - thrust::counting_iterator arg_sort_iter(0); - thrust::copy(rmm::exec_policy(stream), arg_sort_iter, arg_sort_iter + n_rows, src_indices); - perform_1nn(handle, temp_inds_dists.data(), nn_colors.data(), colors.data(), - src_indices.data(), + src_indices, X, n_rows, - n_cols); + n_cols, + reduction_op); /** * Sort data points by color (neighbors are not sorted) From b26cda275772537c31d41f848da0edb0abc30e18 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Thu, 20 Apr 2023 15:50:17 -0700 Subject: [PATCH 03/53] sorting impl --- cpp/include/raft/cluster/detail/mst.cuh | 72 +++++++++++++++++-- .../neighbors/detail/connect_components.cuh | 20 ------ 2 files changed, 66 insertions(+), 26 deletions(-) diff --git a/cpp/include/raft/cluster/detail/mst.cuh b/cpp/include/raft/cluster/detail/mst.cuh index 7007031fe0..236802f766 100644 --- a/cpp/include/raft/cluster/detail/mst.cuh +++ b/cpp/include/raft/cluster/detail/mst.cuh @@ -16,6 +16,10 @@ #pragma once +#include "raft/linalg/map.cuh" +#include "thrust/copy.h" +#include "thrust/iterator/counting_iterator.h" +#include "thrust/transform_reduce.h" #include #include @@ -24,11 +28,46 @@ #include #include +#include #include #include #include namespace raft::cluster::detail { +template +void batched_scatter(raft::device_resources const& handle, + value_t* X, + value_idx* map, + size_t m, + size_t n, + size_t batch_size){ + auto stream = handle.get_stream(); + auto exec_policy = handle.get_thrust_policy(); + + value_idx n_batches = raft::ceildiv((int)n, (int)batch_size); + + + + for(value_idx bid = 0; bid < n_batches; bid++) { + value_idx batch_offset = bid * batch_size; + value_idx cols_per_batch = min((value_idx)batch_size, (value_idx)n - batch_offset); + auto scratch_space = raft::make_device_vector(handle, m * batch_size); + + auto scatter_op = [X, batch_offset, n = raft::util::FastIntDiv(n), map]__device__(auto idx) { + value_idx row = idx / cols_per_batch; + value_idx col = idx % cols_per_batch; + return X[map[row] * n + batch_offset + col]; + }; + raft::linalg::map_offset(handle, scratch_space.view(), scatter_op); + auto copy_op = [X, batch_offset, n = raft::util::FastIntDiv(n), map]__device__(auto idx) { + value_idx row = idx / cols_per_batch; + value_idx col = idx % cols_per_batch; + return X[row * n + batch_offset + col] = scratch_space[idx]; + }; + auto counting = thrust::make_counting_iterator(0); + thrust::for_each(exec_policy, counting, counting + m * batch_size, copy_op); + } +} template void merge_msts(sparse::solver::Graph_COO& coo1, @@ -68,7 +107,7 @@ void merge_msts(sparse::solver::Graph_COO& coo1, template void connect_knn_graph( raft::device_resources const& handle, - const value_t* X, + value_t* X, sparse::solver::Graph_COO& msf, size_t m, size_t n, @@ -77,18 +116,40 @@ void connect_knn_graph( raft::distance::DistanceType metric = raft::distance::DistanceType::L2SqrtExpanded) { auto stream = handle.get_stream(); + auto exec_policy = handle.get_thrust_policy(); raft::sparse::COO connected_edges(stream); - rmm::device_uvector src_indices(m, stream); + rmm::device_uvector sort_plan(m, stream); thrust::counting_iterator arg_sort_iter(0); - thrust::copy(rmm::exec_policy(stream), arg_sort_iter, arg_sort_iter + m, src_indices.data()); + thrust::copy(rmm::exec_policy(stream), arg_sort_iter, arg_sort_iter + m, sort_plan.data()); + + thrust::sort_by_key(handle.get_thrust_policy(), color, color + m, sort_plan.data()); + reduction_op.rearrange(sort_plan.data()); + + // create inverse map for unsorting + auto counting = thrust::make_counting_iterator(0); + + rmm::device_uvector unsort_plan(m, stream); + auto inverse_map_op = [unsort_plan = unsort_plan.data()] __device__(auto t) { + unsort_plan[thrust::get<0>(t)] = thrust::get<1>(t); + return; + }; + + thrust::for_each( + exec_policy, + thrust::make_zip_iterator(thrust::make_tuple(sort_plan.data(), counting)), + thrust::make_zip_iterator(thrust::make_tuple(sort_plan.data() + m, counting + m)), + inverse_map_op); + + batched_scatter(handle, const_cast(X), sort_plan.data(), 16); - auto tuple_it = thrust::make_zip_iterator(thrust::make_tuple(src_indices, reduction_op.core_dists)); - thrust::sort_by_key(handle.get_thrust_policy(), color, m, tuple_it); raft::sparse::neighbors::connect_components( handle, connected_edges, X, color, m, n, reduction_op); + // unsort the input matrix + batched_scatter(handle, const_cast(X), unsort_plan.data(), 16); + rmm::device_uvector indptr2(m + 1, stream); raft::sparse::convert::sorted_coo_to_csr( connected_edges.rows(), connected_edges.nnz, indptr2.data(), m + 1, stream); @@ -104,7 +165,6 @@ void connect_knn_graph( connected_edges.nnz, color, stream, - false, false); merge_msts(msf, new_mst, stream); diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 6f64c84000..cb6fddae79 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -207,21 +207,6 @@ void perform_1nn(raft::device_resources const& handle, { auto stream = handle.get_stream(); - // raft::print_device_vector("colors_before_sort", colors, n_rows, std::cout); - // raft::print_device_vector("X_before_sort", X, 50, std::cout); - // raft::print_device_vector("src_indices_before_sort", src_indices, n_rows, std::cout); - // Sort data points by color - // thrust::sort_by_key(handle.get_thrust_policy(), colors, colors + n_rows, thrust::make_zip_iterator(thrust::make_tuple(src_indices))); - - // auto X_cpy = raft::make_device_matrix(handle, n_rows, n_cols); - // copy_sorted_kernel<<>>(X, - // X_cpy.data_handle(), - // src_indices, - // n_rows, - // raft::util::FastIntDiv(n_cols)); - // raft::print_device_vector("colors_after_sort", colors, n_rows, std::cout); - // raft::print_device_vector("X_after_sort", X_cpy.data_handle(), 50, std::cout); - // raft::print_device_vector("src_indices_after_sort", src_indices, n_rows, std::cout); auto x_norm = raft::make_device_vector(handle, n_rows); raft::linalg::rowNorm(x_norm.data_handle(), X, n_cols, n_rows, raft::linalg::L2Norm, true, stream); @@ -244,9 +229,7 @@ void perform_1nn(raft::device_resources const& handle, adj_iterator + n_rows * n_components, adj.data_handle(), mask_op); - handle.sync_stream(stream); raft::print_device_vector("adj", adj.data_handle(), 30, std::cout); - // auto adj_view = raft::make_device_matrix_view(adj.data(), n_rows, n_components); auto kvp_view = raft::make_device_vector_view, value_idx>(kvp, n_rows); using OutT = raft::KeyValuePair; // using RedOpT = raft::distance::MinAndDistanceReduceOp; @@ -269,7 +252,6 @@ void perform_1nn(raft::device_resources const& handle, raft::make_device_vector_view(colors_group_idxs.data_handle() + 1, n_components), kvp_view); - handle.sync_stream(stream); RAFT_LOG_INFO("Done until masked l2 distance"); LookupColorOp extract_colors_op(colors); thrust::transform(rmm::exec_policy(stream), kvp, kvp + n_rows, nn_colors, extract_colors_op); @@ -287,13 +269,11 @@ void perform_1nn(raft::device_resources const& handle, thrust::get<1>(t) = thrust::get<0>(t).key; thrust::get<2>(t) = thrust::get<0>(t).value; }); - handle.sync_stream(stream); raft::print_device_vector("nbrs_original", nbrs.data(), n_rows, std::cout); raft::print_device_vector("nbrs_dists", dists.data(), n_rows, std::cout); thrust::for_each(rmm::exec_policy(stream), kvp_iterator, kvp_iterator + n_rows, fetch_neighbor_indices_op); - handle.sync_stream(stream); RAFT_LOG_INFO("Done until fetching neighbor indices"); } From e48c486d14d0e0165d873360898655a8805daf70 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Fri, 21 Apr 2023 10:39:08 -0700 Subject: [PATCH 04/53] Gather function --- cpp/include/raft/cluster/detail/mst.cuh | 1 + .../raft/sparse/neighbors/detail/connect_components.cuh | 8 ++------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/cpp/include/raft/cluster/detail/mst.cuh b/cpp/include/raft/cluster/detail/mst.cuh index 236802f766..3912074e02 100644 --- a/cpp/include/raft/cluster/detail/mst.cuh +++ b/cpp/include/raft/cluster/detail/mst.cuh @@ -22,6 +22,7 @@ #include "thrust/transform_reduce.h" #include #include +#include #include #include diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index cb6fddae79..9c055a2394 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -219,16 +219,12 @@ void perform_1nn(raft::device_resources const& handle, raft::print_device_vector("colors_csr", colors_group_idxs.data_handle(), n_components + 1, std::cout); auto adj = raft::make_device_matrix (handle, n_rows, n_components); auto adj_iterator = thrust::make_counting_iterator(0); - auto mask_op = [colors, n_components = raft::util::FastIntDiv(n_components), adj = adj.data_handle()] __device__(value_idx idx) { + auto mask_op = [colors, n_components = raft::util::FastIntDiv(n_components)] __device__(value_idx idx) { value_idx row = idx / n_components; value_idx col = idx % n_components; return colors[row] != col; }; - thrust::transform(handle.get_thrust_policy(), - adj_iterator, - adj_iterator + n_rows * n_components, - adj.data_handle(), - mask_op); + raft::linalg::map_offset(handle, adj.view(), mask_op); raft::print_device_vector("adj", adj.data_handle(), 30, std::cout); auto kvp_view = raft::make_device_vector_view, value_idx>(kvp, n_rows); using OutT = raft::KeyValuePair; From 89caefb19c7528d80b00afbfdf795d8b8496ad1a Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Fri, 21 Apr 2023 18:50:03 -0700 Subject: [PATCH 05/53] Updated with batch --- cpp/include/raft/cluster/detail/mst.cuh | 73 +-------- .../raft/cluster/detail/single_linkage.cuh | 2 +- .../neighbors/detail/connect_components.cuh | 146 ++++++++++++------ .../sparse/neighbors/connect_components.cu | 2 +- 4 files changed, 108 insertions(+), 115 deletions(-) diff --git a/cpp/include/raft/cluster/detail/mst.cuh b/cpp/include/raft/cluster/detail/mst.cuh index 3912074e02..22fdc5a86c 100644 --- a/cpp/include/raft/cluster/detail/mst.cuh +++ b/cpp/include/raft/cluster/detail/mst.cuh @@ -16,59 +16,19 @@ #pragma once -#include "raft/linalg/map.cuh" -#include "thrust/copy.h" -#include "thrust/iterator/counting_iterator.h" -#include "thrust/transform_reduce.h" #include #include -#include #include #include #include #include -#include #include #include #include namespace raft::cluster::detail { -template -void batched_scatter(raft::device_resources const& handle, - value_t* X, - value_idx* map, - size_t m, - size_t n, - size_t batch_size){ - auto stream = handle.get_stream(); - auto exec_policy = handle.get_thrust_policy(); - - value_idx n_batches = raft::ceildiv((int)n, (int)batch_size); - - - - for(value_idx bid = 0; bid < n_batches; bid++) { - value_idx batch_offset = bid * batch_size; - value_idx cols_per_batch = min((value_idx)batch_size, (value_idx)n - batch_offset); - auto scratch_space = raft::make_device_vector(handle, m * batch_size); - - auto scatter_op = [X, batch_offset, n = raft::util::FastIntDiv(n), map]__device__(auto idx) { - value_idx row = idx / cols_per_batch; - value_idx col = idx % cols_per_batch; - return X[map[row] * n + batch_offset + col]; - }; - raft::linalg::map_offset(handle, scratch_space.view(), scatter_op); - auto copy_op = [X, batch_offset, n = raft::util::FastIntDiv(n), map]__device__(auto idx) { - value_idx row = idx / cols_per_batch; - value_idx col = idx % cols_per_batch; - return X[row * n + batch_offset + col] = scratch_space[idx]; - }; - auto counting = thrust::make_counting_iterator(0); - thrust::for_each(exec_policy, counting, counting + m * batch_size, copy_op); - } -} template void merge_msts(sparse::solver::Graph_COO& coo1, @@ -108,7 +68,7 @@ void merge_msts(sparse::solver::Graph_COO& coo1, template void connect_knn_graph( raft::device_resources const& handle, - value_t* X, + const value_t* X, sparse::solver::Graph_COO& msf, size_t m, size_t n, @@ -117,40 +77,12 @@ void connect_knn_graph( raft::distance::DistanceType metric = raft::distance::DistanceType::L2SqrtExpanded) { auto stream = handle.get_stream(); - auto exec_policy = handle.get_thrust_policy(); raft::sparse::COO connected_edges(stream); - rmm::device_uvector sort_plan(m, stream); - thrust::counting_iterator arg_sort_iter(0); - thrust::copy(rmm::exec_policy(stream), arg_sort_iter, arg_sort_iter + m, sort_plan.data()); - - thrust::sort_by_key(handle.get_thrust_policy(), color, color + m, sort_plan.data()); - reduction_op.rearrange(sort_plan.data()); - - // create inverse map for unsorting - auto counting = thrust::make_counting_iterator(0); - - rmm::device_uvector unsort_plan(m, stream); - auto inverse_map_op = [unsort_plan = unsort_plan.data()] __device__(auto t) { - unsort_plan[thrust::get<0>(t)] = thrust::get<1>(t); - return; - }; - - thrust::for_each( - exec_policy, - thrust::make_zip_iterator(thrust::make_tuple(sort_plan.data(), counting)), - thrust::make_zip_iterator(thrust::make_tuple(sort_plan.data() + m, counting + m)), - inverse_map_op); - - batched_scatter(handle, const_cast(X), sort_plan.data(), 16); - raft::sparse::neighbors::connect_components( handle, connected_edges, X, color, m, n, reduction_op); - // unsort the input matrix - batched_scatter(handle, const_cast(X), unsort_plan.data(), 16); - rmm::device_uvector indptr2(m + 1, stream); raft::sparse::convert::sorted_coo_to_csr( connected_edges.rows(), connected_edges.nnz, indptr2.data(), m + 1, stream); @@ -166,6 +98,7 @@ void connect_knn_graph( connected_edges.nnz, color, stream, + false, false); merge_msts(msf, new_mst, stream); @@ -258,4 +191,4 @@ void build_sorted_mst( raft::copy_async(mst_weight, mst_coo.weights.data(), mst_coo.n_edges, stream); } -}; // namespace raft::cluster::detail +}; // namespace raft::cluster::detail \ No newline at end of file diff --git a/cpp/include/raft/cluster/detail/single_linkage.cuh b/cpp/include/raft/cluster/detail/single_linkage.cuh index 473d858827..a2f24accdd 100644 --- a/cpp/include/raft/cluster/detail/single_linkage.cuh +++ b/cpp/include/raft/cluster/detail/single_linkage.cuh @@ -80,7 +80,7 @@ void single_linkage(raft::device_resources const& handle, * 2. Construct MST, sorted by weights */ rmm::device_uvector color(m, stream); - raft::sparse::neighbors::FixConnectivitiesRedOp op(color.data(), m); + raft::sparse::neighbors::FixConnectivitiesRedOp op(m); detail::build_sorted_mst(handle, X, indptr.data(), diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 9c055a2394..56ac6953be 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -85,6 +85,12 @@ struct FixConnectivitiesRedOp { out->key = -1; out->value = maxVal; } + + void gather(value_idx* map) { + } + + void scatter(value_idx* map) { + } }; /** @@ -167,17 +173,70 @@ struct LookupColorOp { }; template -__global__ void copy_sorted_kernel(const value_t* X, - value_t* X_cpy, - value_idx* src_indices, - size_t n_rows, - raft::util::FastIntDiv n_cols) -{ - value_idx tid = blockDim.x * blockIdx.x + threadIdx.x; - if (tid >= n_rows * n_cols.d) return; - value_idx row = tid / n_cols; - value_idx col = tid % n_cols; - X_cpy[tid] = X[src_indices[row] * n_cols.d + col]; +void batched_gather(raft::device_resources const& handle, + value_t* X, + value_idx* map, + size_t m, + size_t n, + size_t batch_size){ + auto stream = handle.get_stream(); + auto exec_policy = handle.get_thrust_policy(); + + value_idx n_batches = raft::ceildiv((int)n, (int)batch_size); + + for(value_idx bid = 0; bid < n_batches; bid++) { + value_idx batch_offset = bid * batch_size; + value_idx cols_per_batch = min((value_idx)batch_size, (value_idx)n - bid * batch_offset); + auto scratch_space = raft::make_device_vector(handle, m * cols_per_batch); + + auto scatter_op = [X, map, batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n]__device__(auto idx) { + value_idx row = idx / cols_per_batch; + value_idx col = idx % cols_per_batch; + return X[map[row] * n + batch_offset + col]; + }; + raft::linalg::map_offset(handle, scratch_space.view(), scatter_op); + auto copy_op = [X, map, scratch_space = scratch_space.data_handle(), batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n]__device__(auto idx) { + value_idx row = idx / cols_per_batch; + value_idx col = idx % cols_per_batch; + return X[row * n + batch_offset + col] = scratch_space[idx]; + }; + auto counting = thrust::make_counting_iterator(0); + thrust::for_each(exec_policy, counting, counting + m * batch_size, copy_op); + } +} + +template +void batched_scatter(raft::device_resources const& handle, + value_t* X, + value_idx* map, + size_t m, + size_t n, + size_t batch_size) { + + auto stream = handle.get_stream(); + auto exec_policy = handle.get_thrust_policy(); + + value_idx n_batches = raft::ceildiv((int)n, (int)batch_size); + + for(value_idx bid = 0; bid < n_batches; bid++) { + value_idx batch_offset = bid * batch_size; + value_idx cols_per_batch = min((value_idx)batch_size, (value_idx)n - bid * batch_offset); + auto scratch_space = raft::make_device_vector(handle, m * cols_per_batch); + + auto scatter_op = [X, map, batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n]__device__(auto idx) { + value_idx row = idx / cols_per_batch; + value_idx col = idx % cols_per_batch; + return X[row * n + batch_offset + col]; + }; + raft::linalg::map_offset(handle, scratch_space.view(), scatter_op); + auto copy_op = [X, map, scratch_space = scratch_space.data_handle(), batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n]__device__(auto idx) { + value_idx row = idx / cols_per_batch; + value_idx col = idx % cols_per_batch; + X[map[row] * n + batch_offset + col] = scratch_space[idx]; + }; + auto counting = thrust::make_counting_iterator(0); + thrust::for_each(exec_policy, counting, counting + m * batch_size, copy_op); + } } /** @@ -199,7 +258,6 @@ void perform_1nn(raft::device_resources const& handle, raft::KeyValuePair* kvp, value_idx* nn_colors, value_idx* colors, - value_idx* src_indices, const value_t* X, size_t n_rows, size_t n_cols, @@ -218,7 +276,6 @@ void perform_1nn(raft::device_resources const& handle, raft::print_device_vector("colors", colors, n_rows, std::cout); raft::print_device_vector("colors_csr", colors_group_idxs.data_handle(), n_components + 1, std::cout); auto adj = raft::make_device_matrix (handle, n_rows, n_components); - auto adj_iterator = thrust::make_counting_iterator(0); auto mask_op = [colors, n_components = raft::util::FastIntDiv(n_components)] __device__(value_idx idx) { value_idx row = idx / n_components; value_idx col = idx % n_components; @@ -228,20 +285,17 @@ void perform_1nn(raft::device_resources const& handle, raft::print_device_vector("adj", adj.data_handle(), 30, std::cout); auto kvp_view = raft::make_device_vector_view, value_idx>(kvp, n_rows); using OutT = raft::KeyValuePair; - // using RedOpT = raft::distance::MinAndDistanceReduceOp; - // using PairRedOpT = raft::distance::KVPMinReduce; using ParamT = raft::distance::masked_l2_nn_params; - // raft::distance::initialize, int>( - // handle, kvp, n_rows, std::numeric_limits::max(), RedOpT{}); ParamT params{ reduction_op, reduction_op, true, true}; + auto X_view = raft::make_device_matrix_view(X, n_rows, n_cols); raft::distance::masked_l2_nn(handle, params, - X, - X, + X_view, + X_view, x_norm.view(), x_norm.view(), adj.view(), @@ -253,24 +307,6 @@ void perform_1nn(raft::device_resources const& handle, thrust::transform(rmm::exec_policy(stream), kvp, kvp + n_rows, nn_colors, extract_colors_op); raft::print_device_vector("nn_colors", nn_colors, n_rows, std::cout); - auto fetch_neighbor_indices_op = [kvp, src_indices]__device__ (auto t) { - thrust::get<0>(t).key = src_indices[thrust::get<0>(t).key]; - }; - auto kvp_iterator = thrust::make_zip_iterator(thrust::make_tuple(kvp)); - rmm::device_uvector nbrs(n_rows, stream); - rmm::device_uvector dists(n_rows, stream); - - auto nbrs_it = thrust::make_zip_iterator(thrust::make_tuple(kvp, nbrs.data(), dists.data())); - thrust::for_each(handle.get_thrust_policy(), nbrs_it, nbrs_it + n_rows, [=]__device__ (auto t) { - thrust::get<1>(t) = thrust::get<0>(t).key; - thrust::get<2>(t) = thrust::get<0>(t).value; - }); - raft::print_device_vector("nbrs_original", nbrs.data(), n_rows, std::cout); - raft::print_device_vector("nbrs_dists", dists.data(), n_rows, std::cout); - - thrust::for_each(rmm::exec_policy(stream), kvp_iterator, kvp_iterator + n_rows, fetch_neighbor_indices_op); - - RAFT_LOG_INFO("Done until fetching neighbor indices"); } /** @@ -309,6 +345,7 @@ __global__ void min_components_by_color_kernel(value_idx* out_rows, value_t* out_vals, const value_idx* out_index, const value_idx* indices, + const value_idx* sort_plan, const raft::KeyValuePair* kvp, size_t nnz) { @@ -319,8 +356,8 @@ __global__ void min_components_by_color_kernel(value_idx* out_rows, int idx = out_index[tid]; if ((tid == 0 || (out_index[tid - 1] != idx))) { - out_rows[idx] = indices[tid]; - out_cols[idx] = kvp[tid].key; + out_rows[idx] = sort_plan[indices[tid]]; + out_cols[idx] = sort_plan[kvp[tid].key]; out_vals[idx] = kvp[tid].value; } } @@ -343,6 +380,7 @@ template void min_components_by_color(raft::sparse::COO& coo, const value_idx* out_index, const value_idx* indices, + const value_idx* sort_plan, const raft::KeyValuePair* kvp, size_t nnz, cudaStream_t stream) @@ -353,7 +391,7 @@ void min_components_by_color(raft::sparse::COO& coo, * the min. */ min_components_by_color_kernel<<>>( - coo.rows(), coo.cols(), coo.vals(), out_index, indices, kvp, nnz); + coo.rows(), coo.cols(), coo.vals(), out_index, indices, sort_plan, kvp, nnz); } /** @@ -397,6 +435,22 @@ void connect_components( // Normalize colors so they are drawn from a monotonically increasing set raft::label::make_monotonic(colors.data(), colors.data(), n_rows, stream, true); + rmm::device_uvector sort_plan(n_rows, stream); + thrust::counting_iterator arg_sort_iter(0); + thrust::copy(rmm::exec_policy(stream), arg_sort_iter, arg_sort_iter + n_rows, sort_plan.data()); + + thrust::sort_by_key(handle.get_thrust_policy(), colors.data(), colors.data() + n_rows, sort_plan.data()); + + // Modify the reduction operation based on the sort plan. This is particularly needed for HDBSCAN + reduction_op.gather(sort_plan.data()); + + batched_gather(handle, + const_cast(X), + sort_plan.data(), + n_rows, + n_cols, + n_cols); + /** * First compute 1-nn for all colors where the color of each data point * is guaranteed to be != color of its nearest neighbor. @@ -409,7 +463,6 @@ void connect_components( temp_inds_dists.data(), nn_colors.data(), colors.data(), - src_indices, X, n_rows, n_cols, @@ -452,8 +505,15 @@ void connect_components( min_edges.allocate(size, n_rows, n_rows, true, stream); min_components_by_color( - min_edges, out_index.data(), src_indices.data(), temp_inds_dists.data(), n_rows, stream); - + min_edges, out_index.data(), src_indices.data(), sort_plan.data(), temp_inds_dists.data(), n_rows, stream); + + batched_scatter(handle, + const_cast(X), + sort_plan.data(), + n_rows, + n_cols, + n_cols); + reduction_op.scatter(sort_plan.data()); /** * Symmetrize resulting edge list */ diff --git a/cpp/test/sparse/neighbors/connect_components.cu b/cpp/test/sparse/neighbors/connect_components.cu index d200744329..b556eb637d 100644 --- a/cpp/test/sparse/neighbors/connect_components.cu +++ b/cpp/test/sparse/neighbors/connect_components.cu @@ -105,7 +105,7 @@ class ConnectComponentsTest /** * 3. connect_components to fix connectivities */ - raft::linkage::FixConnectivitiesRedOp red_op(colors.data(), params.n_row); + raft::linkage::FixConnectivitiesRedOp red_op(params.n_row); raft::linkage::connect_components( handle, out_edges, data.data(), colors.data(), params.n_row, params.n_col, red_op); From 4ef8731ec72e4ca2ab9d963bf17ee0366c2fe5dc Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Mon, 24 Apr 2023 18:33:52 -0700 Subject: [PATCH 06/53] Working impl --- .../raft/sparse/neighbors/detail/connect_components.cuh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 56ac6953be..33b625ccd0 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -86,10 +86,10 @@ struct FixConnectivitiesRedOp { out->value = maxVal; } - void gather(value_idx* map) { + void gather(raft::device_resources const& handle, value_idx* map) { } - void scatter(value_idx* map) { + void scatter(raft::device_resources const& handle, value_idx* map) { } }; @@ -442,7 +442,7 @@ void connect_components( thrust::sort_by_key(handle.get_thrust_policy(), colors.data(), colors.data() + n_rows, sort_plan.data()); // Modify the reduction operation based on the sort plan. This is particularly needed for HDBSCAN - reduction_op.gather(sort_plan.data()); + reduction_op.gather(handle, sort_plan.data()); batched_gather(handle, const_cast(X), @@ -513,7 +513,7 @@ void connect_components( n_rows, n_cols, n_cols); - reduction_op.scatter(sort_plan.data()); + reduction_op.scatter(handle, sort_plan.data()); /** * Symmetrize resulting edge list */ From aebbb3f8be91fe5ea219eafe51fd5c23936bd9c8 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Wed, 26 Apr 2023 15:05:36 -0700 Subject: [PATCH 07/53] Remove debug --- .../neighbors/detail/connect_components.cuh | 227 +++++++++--------- 1 file changed, 111 insertions(+), 116 deletions(-) diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 33b625ccd0..cf34a70f9c 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -86,11 +86,9 @@ struct FixConnectivitiesRedOp { out->value = maxVal; } - void gather(raft::device_resources const& handle, value_idx* map) { - } + void gather(raft::device_resources const& handle, value_idx* map) {} - void scatter(raft::device_resources const& handle, value_idx* map) { - } + void scatter(raft::device_resources const& handle, value_idx* map) {} }; /** @@ -174,30 +172,38 @@ struct LookupColorOp { template void batched_gather(raft::device_resources const& handle, - value_t* X, - value_idx* map, - size_t m, - size_t n, - size_t batch_size){ - auto stream = handle.get_stream(); + value_t* X, + value_idx* map, + size_t m, + size_t n, + size_t batch_size) +{ + auto stream = handle.get_stream(); auto exec_policy = handle.get_thrust_policy(); - value_idx n_batches = raft::ceildiv((int)n, (int)batch_size); + value_idx n_batches = raft::ceildiv((value_idx)n, (value_idx)batch_size); - for(value_idx bid = 0; bid < n_batches; bid++) { - value_idx batch_offset = bid * batch_size; + for (value_idx bid = 0; bid < n_batches; bid++) { + value_idx batch_offset = bid * batch_size; value_idx cols_per_batch = min((value_idx)batch_size, (value_idx)n - bid * batch_offset); auto scratch_space = raft::make_device_vector(handle, m * cols_per_batch); - auto scatter_op = [X, map, batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n]__device__(auto idx) { - value_idx row = idx / cols_per_batch; - value_idx col = idx % cols_per_batch; - return X[map[row] * n + batch_offset + col]; - }; + auto scatter_op = + [X, map, batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n] __device__( + auto idx) { + value_idx row = idx / cols_per_batch; + value_idx col = idx % cols_per_batch; + return X[map[row] * n + batch_offset + col]; + }; raft::linalg::map_offset(handle, scratch_space.view(), scatter_op); - auto copy_op = [X, map, scratch_space = scratch_space.data_handle(), batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n]__device__(auto idx) { - value_idx row = idx / cols_per_batch; - value_idx col = idx % cols_per_batch; + auto copy_op = [X, + map, + scratch_space = scratch_space.data_handle(), + batch_offset, + cols_per_batch = raft::util::FastIntDiv(cols_per_batch), + n] __device__(auto idx) { + value_idx row = idx / cols_per_batch; + value_idx col = idx % cols_per_batch; return X[row * n + batch_offset + col] = scratch_space[idx]; }; auto counting = thrust::make_counting_iterator(0); @@ -207,31 +213,38 @@ void batched_gather(raft::device_resources const& handle, template void batched_scatter(raft::device_resources const& handle, - value_t* X, - value_idx* map, - size_t m, - size_t n, - size_t batch_size) { - - auto stream = handle.get_stream(); + value_t* X, + value_idx* map, + size_t m, + size_t n, + size_t batch_size) +{ + auto stream = handle.get_stream(); auto exec_policy = handle.get_thrust_policy(); - value_idx n_batches = raft::ceildiv((int)n, (int)batch_size); + value_idx n_batches = raft::ceildiv((value_idx)n, (value_idx)batch_size); - for(value_idx bid = 0; bid < n_batches; bid++) { - value_idx batch_offset = bid * batch_size; + for (value_idx bid = 0; bid < n_batches; bid++) { + value_idx batch_offset = bid * batch_size; value_idx cols_per_batch = min((value_idx)batch_size, (value_idx)n - bid * batch_offset); auto scratch_space = raft::make_device_vector(handle, m * cols_per_batch); - auto scatter_op = [X, map, batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n]__device__(auto idx) { - value_idx row = idx / cols_per_batch; - value_idx col = idx % cols_per_batch; - return X[row * n + batch_offset + col]; - }; + auto scatter_op = + [X, map, batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n] __device__( + auto idx) { + value_idx row = idx / cols_per_batch; + value_idx col = idx % cols_per_batch; + return X[row * n + batch_offset + col]; + }; raft::linalg::map_offset(handle, scratch_space.view(), scatter_op); - auto copy_op = [X, map, scratch_space = scratch_space.data_handle(), batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n]__device__(auto idx) { - value_idx row = idx / cols_per_batch; - value_idx col = idx % cols_per_batch; + auto copy_op = [X, + map, + scratch_space = scratch_space.data_handle(), + batch_offset, + cols_per_batch = raft::util::FastIntDiv(cols_per_batch), + n] __device__(auto idx) { + value_idx row = idx / cols_per_batch; + value_idx col = idx % cols_per_batch; X[map[row] * n + batch_offset + col] = scratch_space[idx]; }; auto counting = thrust::make_counting_iterator(0); @@ -267,46 +280,43 @@ void perform_1nn(raft::device_resources const& handle, auto x_norm = raft::make_device_vector(handle, n_rows); - raft::linalg::rowNorm(x_norm.data_handle(), X, n_cols, n_rows, raft::linalg::L2Norm, true, stream); - - value_idx n_components = get_n_components(colors, n_rows, stream); - auto colors_group_idxs = raft::make_device_vector (handle, n_components + 1); - raft::sparse::convert::sorted_coo_to_csr(colors, n_rows, colors_group_idxs.data_handle(), n_components + 1, stream); + raft::linalg::rowNorm( + x_norm.data_handle(), X, n_cols, n_rows, raft::linalg::L2Norm, true, stream); + + value_idx n_components = get_n_components(colors, n_rows, stream); + auto colors_group_idxs = raft::make_device_vector(handle, n_components + 1); + raft::sparse::convert::sorted_coo_to_csr( + colors, n_rows, colors_group_idxs.data_handle(), n_components + 1, stream); + + auto adj = raft::make_device_matrix(handle, n_rows, n_components); + auto mask_op = [colors, + n_components = raft::util::FastIntDiv(n_components)] __device__(value_idx idx) { + value_idx row = idx / n_components; + value_idx col = idx % n_components; + return colors[row] != col; + }; + raft::linalg::map_offset(handle, adj.view(), mask_op); + auto kvp_view = + raft::make_device_vector_view, value_idx>(kvp, n_rows); + using OutT = raft::KeyValuePair; + using ParamT = raft::distance::masked_l2_nn_params; + + ParamT params{reduction_op, reduction_op, true, true}; + + auto X_view = raft::make_device_matrix_view(X, n_rows, n_cols); + raft::distance::masked_l2_nn( + handle, + params, + X_view, + X_view, + x_norm.view(), + x_norm.view(), + adj.view(), + raft::make_device_vector_view(colors_group_idxs.data_handle() + 1, n_components), + kvp_view); - raft::print_device_vector("colors", colors, n_rows, std::cout); - raft::print_device_vector("colors_csr", colors_group_idxs.data_handle(), n_components + 1, std::cout); - auto adj = raft::make_device_matrix (handle, n_rows, n_components); - auto mask_op = [colors, n_components = raft::util::FastIntDiv(n_components)] __device__(value_idx idx) { - value_idx row = idx / n_components; - value_idx col = idx % n_components; - return colors[row] != col; - }; - raft::linalg::map_offset(handle, adj.view(), mask_op); - raft::print_device_vector("adj", adj.data_handle(), 30, std::cout); - auto kvp_view = raft::make_device_vector_view, value_idx>(kvp, n_rows); - using OutT = raft::KeyValuePair; - using ParamT = raft::distance::masked_l2_nn_params; - ParamT params{ - reduction_op, - reduction_op, - true, - true}; - auto X_view = raft::make_device_matrix_view(X, n_rows, n_cols); - raft::distance::masked_l2_nn(handle, - params, - X_view, - X_view, - x_norm.view(), - x_norm.view(), - adj.view(), - raft::make_device_vector_view(colors_group_idxs.data_handle() + 1, n_components), - kvp_view); - - RAFT_LOG_INFO("Done until masked l2 distance"); LookupColorOp extract_colors_op(colors); thrust::transform(rmm::exec_policy(stream), kvp, kvp + n_rows, nn_colors, extract_colors_op); - - raft::print_device_vector("nn_colors", nn_colors, n_rows, std::cout); } /** @@ -413,22 +423,16 @@ void min_components_by_color(raft::sparse::COO& coo, * @param[in] n_cols number of cols in X */ template -void connect_components( - raft::device_resources const& handle, - raft::sparse::COO& out, - const value_t* X, - const value_idx* orig_colors, - size_t n_rows, - size_t n_cols, - red_op reduction_op, - raft::distance::DistanceType metric = raft::distance::DistanceType::L2SqrtExpanded) +void connect_components(raft::device_resources const& handle, + raft::sparse::COO& out, + const value_t* X, + const value_idx* orig_colors, + size_t n_rows, + size_t n_cols, + red_op reduction_op) { auto stream = handle.get_stream(); - RAFT_EXPECTS(metric == raft::distance::DistanceType::L2SqrtExpanded, - "Fixing connectivities for an unconnected k-NN graph only " - "supports L2SqrtExpanded currently."); - rmm::device_uvector colors(n_rows, stream); raft::copy_async(colors.data(), orig_colors, n_rows, stream); @@ -439,17 +443,13 @@ void connect_components( thrust::counting_iterator arg_sort_iter(0); thrust::copy(rmm::exec_policy(stream), arg_sort_iter, arg_sort_iter + n_rows, sort_plan.data()); - thrust::sort_by_key(handle.get_thrust_policy(), colors.data(), colors.data() + n_rows, sort_plan.data()); - + thrust::sort_by_key( + handle.get_thrust_policy(), colors.data(), colors.data() + n_rows, sort_plan.data()); + // Modify the reduction operation based on the sort plan. This is particularly needed for HDBSCAN reduction_op.gather(handle, sort_plan.data()); - batched_gather(handle, - const_cast(X), - sort_plan.data(), - n_rows, - n_cols, - n_cols); + batched_gather(handle, const_cast(X), sort_plan.data(), n_rows, n_cols, n_cols); /** * First compute 1-nn for all colors where the color of each data point @@ -460,13 +460,13 @@ void connect_components( rmm::device_uvector src_indices(n_rows, stream); perform_1nn(handle, - temp_inds_dists.data(), - nn_colors.data(), + temp_inds_dists.data(), + nn_colors.data(), colors.data(), - X, - n_rows, - n_cols, - reduction_op); + X, + n_rows, + n_cols, + reduction_op); /** * Sort data points by color (neighbors are not sorted) @@ -484,15 +484,10 @@ void connect_components( raft::sparse::op::compute_duplicates_mask( out_index.data(), colors.data(), nn_colors.data(), n_rows, stream); - raft::print_device_vector("colors", colors.data(), n_rows, std::cout); - raft::print_device_vector("nn_colors", nn_colors.data(), n_rows, std::cout); - raft::print_device_vector("out_index", out_index.data(), n_rows, std::cout); - thrust::exclusive_scan(handle.get_thrust_policy(), out_index.data(), out_index.data() + out_index.size(), out_index.data()); - raft::print_device_vector("out_index", out_index.data(), n_rows, std::cout); // compute final size value_idx size = 0; @@ -504,15 +499,15 @@ void connect_components( raft::sparse::COO min_edges(stream); min_edges.allocate(size, n_rows, n_rows, true, stream); - min_components_by_color( - min_edges, out_index.data(), src_indices.data(), sort_plan.data(), temp_inds_dists.data(), n_rows, stream); + min_components_by_color(min_edges, + out_index.data(), + src_indices.data(), + sort_plan.data(), + temp_inds_dists.data(), + n_rows, + stream); - batched_scatter(handle, - const_cast(X), - sort_plan.data(), - n_rows, - n_cols, - n_cols); + batched_scatter(handle, const_cast(X), sort_plan.data(), n_rows, n_cols, n_cols); reduction_op.scatter(handle, sort_plan.data()); /** * Symmetrize resulting edge list From 6dbd6a4d35d3cb34581621e7a86cf1cd9acba5e1 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Wed, 26 Apr 2023 22:55:28 -0700 Subject: [PATCH 08/53] change api --- cpp/include/raft/cluster/detail/mst.cuh | 2 +- .../raft/sparse/neighbors/connect_components.cuh | 6 +++--- .../sparse/neighbors/detail/connect_components.cuh | 11 +++++++++-- cpp/test/sparse/neighbors/connect_components.cu | 2 +- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/cpp/include/raft/cluster/detail/mst.cuh b/cpp/include/raft/cluster/detail/mst.cuh index 22fdc5a86c..0a84eb0582 100644 --- a/cpp/include/raft/cluster/detail/mst.cuh +++ b/cpp/include/raft/cluster/detail/mst.cuh @@ -81,7 +81,7 @@ void connect_knn_graph( raft::sparse::COO connected_edges(stream); raft::sparse::neighbors::connect_components( - handle, connected_edges, X, color, m, n, reduction_op); + handle, connected_edges, X, color, m, n, n, reduction_op); rmm::device_uvector indptr2(m + 1, stream); raft::sparse::convert::sorted_coo_to_csr( diff --git a/cpp/include/raft/sparse/neighbors/connect_components.cuh b/cpp/include/raft/sparse/neighbors/connect_components.cuh index 90343c1215..efc666b1b7 100644 --- a/cpp/include/raft/sparse/neighbors/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/connect_components.cuh @@ -70,10 +70,10 @@ void connect_components( const value_idx* orig_colors, size_t n_rows, size_t n_cols, - red_op reduction_op, - raft::distance::DistanceType metric = raft::distance::DistanceType::L2SqrtExpanded) + size_t col_batch_size, + red_op reduction_op) { - detail::connect_components(handle, out, X, orig_colors, n_rows, n_cols, reduction_op, metric); + detail::connect_components(handle, out, X, orig_colors, n_rows, n_cols, col_batch_size, reduction_op); } }; // end namespace raft::sparse::neighbors \ No newline at end of file diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index cf34a70f9c..da44928969 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -17,6 +17,7 @@ #include +#include #include #include #include @@ -32,6 +33,8 @@ #include #include +#include + #include #include #include @@ -429,8 +432,12 @@ void connect_components(raft::device_resources const& handle, const value_idx* orig_colors, size_t n_rows, size_t n_cols, + size_t col_batch_size, red_op reduction_op) { + auto func_range = raft::common::nvtx::range{__func__}; + + RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= n_cols, "col_batch_size should be > 0 and <= n_cols"); auto stream = handle.get_stream(); rmm::device_uvector colors(n_rows, stream); @@ -449,7 +456,7 @@ void connect_components(raft::device_resources const& handle, // Modify the reduction operation based on the sort plan. This is particularly needed for HDBSCAN reduction_op.gather(handle, sort_plan.data()); - batched_gather(handle, const_cast(X), sort_plan.data(), n_rows, n_cols, n_cols); + batched_gather(handle, const_cast(X), sort_plan.data(), n_rows, n_cols, col_batch_size); /** * First compute 1-nn for all colors where the color of each data point @@ -507,7 +514,7 @@ void connect_components(raft::device_resources const& handle, n_rows, stream); - batched_scatter(handle, const_cast(X), sort_plan.data(), n_rows, n_cols, n_cols); + batched_scatter(handle, const_cast(X), sort_plan.data(), n_rows, n_cols, col_batch_size); reduction_op.scatter(handle, sort_plan.data()); /** * Symmetrize resulting edge list diff --git a/cpp/test/sparse/neighbors/connect_components.cu b/cpp/test/sparse/neighbors/connect_components.cu index b556eb637d..3e8a5ed7d0 100644 --- a/cpp/test/sparse/neighbors/connect_components.cu +++ b/cpp/test/sparse/neighbors/connect_components.cu @@ -107,7 +107,7 @@ class ConnectComponentsTest */ raft::linkage::FixConnectivitiesRedOp red_op(params.n_row); raft::linkage::connect_components( - handle, out_edges, data.data(), colors.data(), params.n_row, params.n_col, red_op); + handle, out_edges, data.data(), colors.data(), params.n_row, params.n_col, params.n_col, red_op); /** * Construct final edge list From 681da7a0651ae13e7fb404837cd0a3cbb6767f7c Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Wed, 3 May 2023 13:33:59 -0700 Subject: [PATCH 09/53] Benchmarking --- .../neighbors/detail/connect_components.cuh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index da44928969..ea80a21208 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -450,6 +450,8 @@ void connect_components(raft::device_resources const& handle, thrust::counting_iterator arg_sort_iter(0); thrust::copy(rmm::exec_policy(stream), arg_sort_iter, arg_sort_iter + n_rows, sort_plan.data()); + uint32_t sort_start = curTimeMillis(); + thrust::sort_by_key( handle.get_thrust_policy(), colors.data(), colors.data() + n_rows, sort_plan.data()); @@ -458,6 +460,9 @@ void connect_components(raft::device_resources const& handle, batched_gather(handle, const_cast(X), sort_plan.data(), n_rows, n_cols, col_batch_size); + uint32_t sort_end = curTimeMillis(); + + RAFT_LOG_INFO("Time required to sort %zu", sort_end - sort_start); /** * First compute 1-nn for all colors where the color of each data point * is guaranteed to be != color of its nearest neighbor. @@ -466,6 +471,7 @@ void connect_components(raft::device_resources const& handle, rmm::device_uvector> temp_inds_dists(n_rows, stream); rmm::device_uvector src_indices(n_rows, stream); + uint32_t op_start = curTimeMillis(); perform_1nn(handle, temp_inds_dists.data(), nn_colors.data(), @@ -513,9 +519,19 @@ void connect_components(raft::device_resources const& handle, temp_inds_dists.data(), n_rows, stream); + uint32_t op_end = curTimeMillis(); + + RAFT_LOG_INFO("Time required for all operations between sort and unsort %zu", op_end - op_start); + + uint32_t unsort_start = curTimeMillis(); batched_scatter(handle, const_cast(X), sort_plan.data(), n_rows, n_cols, col_batch_size); reduction_op.scatter(handle, sort_plan.data()); + + uint32_t unsort_end = curTimeMillis(); + + RAFT_LOG_INFO("Time required to unsort %zu", unsort_end - unsort_start); + /** * Symmetrize resulting edge list */ From 81a3d60144d55d9433e76a012e990e3c8bd07263 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Wed, 3 May 2023 14:18:02 -0700 Subject: [PATCH 10/53] Remove nvtx --- cpp/include/raft/sparse/neighbors/detail/connect_components.cuh | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index ea80a21208..a94501bc5f 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -435,8 +435,6 @@ void connect_components(raft::device_resources const& handle, size_t col_batch_size, red_op reduction_op) { - auto func_range = raft::common::nvtx::range{__func__}; - RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= n_cols, "col_batch_size should be > 0 and <= n_cols"); auto stream = handle.get_stream(); From d7aec0bb4759fc9f85212903df99fb7e23058226 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Fri, 5 May 2023 12:48:29 -0700 Subject: [PATCH 11/53] bm --- .../raft/distance/detail/masked_nn.cuh | 9 ++++++ .../neighbors/detail/connect_components.cuh | 28 +++++++++++++------ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/cpp/include/raft/distance/detail/masked_nn.cuh b/cpp/include/raft/distance/detail/masked_nn.cuh index 1cf7188b06..9114f2bd04 100644 --- a/cpp/include/raft/distance/detail/masked_nn.cuh +++ b/cpp/include/raft/distance/detail/masked_nn.cuh @@ -16,6 +16,7 @@ #pragma once +#include "raft/core/logger-macros.hpp" #include #include @@ -298,6 +299,8 @@ void masked_l2_nn_impl(raft::device_resources const& handle, dim3 block(P::Nthreads); dim3 grid = launchConfigGenerator

(m, n, smemSize, kernel); + handle.sync_stream(stream); + uint32_t masked_nn_kernel_start = curTimeMillis(); kernel<<>>(out, x, y, @@ -316,8 +319,14 @@ void masked_l2_nn_impl(raft::device_resources const& handle, pairRedOp, core_lambda, fin_op); + handle.sync_stream(stream); + uint32_t masked_nn_kernel_end = curTimeMillis(); + RAFT_LOG_INFO("Time taken by masked nn kernel %zu", masked_nn_kernel_end - masked_nn_kernel_start); RAFT_CUDA_TRY(cudaGetLastError()); + size_t free, total; + cudaMemGetInfo(&free, &total); + RAFT_LOG_INFO("Peak memory usage during masked nn: Free memory (MB) %zu; Used memory (MB) %zu", free / (1024 * 1024), (total - free) / (1024 * 1024)); } } // namespace detail diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index a94501bc5f..8ff57c6235 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -15,6 +15,8 @@ */ #pragma once +#include "raft/core/logger-macros.hpp" +#include #include #include @@ -185,10 +187,13 @@ void batched_gather(raft::device_resources const& handle, auto exec_policy = handle.get_thrust_policy(); value_idx n_batches = raft::ceildiv((value_idx)n, (value_idx)batch_size); - + RAFT_LOG_INFO("n_batches %d", n_batches); + size_t free, total; + cudaMemGetInfo(&free, &total); + RAFT_LOG_INFO("Peak memory usage before batched gather: Free memory (MB) %zu; Used memory (MB) %zu", free / (1024 * 1024), (total - free) / (1024 * 1024)); for (value_idx bid = 0; bid < n_batches; bid++) { value_idx batch_offset = bid * batch_size; - value_idx cols_per_batch = min((value_idx)batch_size, (value_idx)n - bid * batch_offset); + value_idx cols_per_batch = min((value_idx)batch_size, (value_idx)n - batch_offset); auto scratch_space = raft::make_device_vector(handle, m * cols_per_batch); auto scatter_op = @@ -211,7 +216,11 @@ void batched_gather(raft::device_resources const& handle, }; auto counting = thrust::make_counting_iterator(0); thrust::for_each(exec_policy, counting, counting + m * batch_size, copy_op); - } + if (bid == n_batches - 1) { + cudaMemGetInfo(&free, &total); + RAFT_LOG_INFO("Peak memory usage during batched gather: Free memory (MB) %zu; Used memory (MB) %zu", free / (1024 * 1024), (total - free) / (1024 * 1024)); + } + } } template @@ -229,7 +238,7 @@ void batched_scatter(raft::device_resources const& handle, for (value_idx bid = 0; bid < n_batches; bid++) { value_idx batch_offset = bid * batch_size; - value_idx cols_per_batch = min((value_idx)batch_size, (value_idx)n - bid * batch_offset); + value_idx cols_per_batch = min((value_idx)batch_size, (value_idx)n - batch_offset); auto scratch_space = raft::make_device_vector(handle, m * cols_per_batch); auto scatter_op = @@ -307,6 +316,7 @@ void perform_1nn(raft::device_resources const& handle, ParamT params{reduction_op, reduction_op, true, true}; auto X_view = raft::make_device_matrix_view(X, n_rows, n_cols); + uint32_t masked_nn_kernel_start = curTimeMillis(); raft::distance::masked_l2_nn( handle, params, @@ -317,7 +327,8 @@ void perform_1nn(raft::device_resources const& handle, adj.view(), raft::make_device_vector_view(colors_group_idxs.data_handle() + 1, n_components), kvp_view); - + uint32_t masked_nn_kernel_end = curTimeMillis(); + RAFT_LOG_INFO("Time taken by masked_nn function (ms) %zu", masked_nn_kernel_end - masked_nn_kernel_start); LookupColorOp extract_colors_op(colors); thrust::transform(rmm::exec_policy(stream), kvp, kvp + n_rows, nn_colors, extract_colors_op); } @@ -435,6 +446,7 @@ void connect_components(raft::device_resources const& handle, size_t col_batch_size, red_op reduction_op) { + RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= n_cols, "col_batch_size should be > 0 and <= n_cols"); auto stream = handle.get_stream(); @@ -460,7 +472,7 @@ void connect_components(raft::device_resources const& handle, uint32_t sort_end = curTimeMillis(); - RAFT_LOG_INFO("Time required to sort %zu", sort_end - sort_start); + RAFT_LOG_INFO("Time required to sort (ms) %zu", sort_end - sort_start); /** * First compute 1-nn for all colors where the color of each data point * is guaranteed to be != color of its nearest neighbor. @@ -519,7 +531,7 @@ void connect_components(raft::device_resources const& handle, stream); uint32_t op_end = curTimeMillis(); - RAFT_LOG_INFO("Time required for all operations between sort and unsort %zu", op_end - op_start); + RAFT_LOG_INFO("Time required for all operations between sort and unsort (ms) %zu", op_end - op_start); uint32_t unsort_start = curTimeMillis(); @@ -528,7 +540,7 @@ void connect_components(raft::device_resources const& handle, uint32_t unsort_end = curTimeMillis(); - RAFT_LOG_INFO("Time required to unsort %zu", unsort_end - unsort_start); + RAFT_LOG_INFO("Time required to unsort (ms) %zu", unsort_end - unsort_start); /** * Symmetrize resulting edge list From 8949881749d544d629292fb77bf1c62b0b9b68f0 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Wed, 10 May 2023 20:39:47 -0700 Subject: [PATCH 12/53] Row batch_size --- cpp/include/raft/cluster/detail/mst.cuh | 11 +- .../raft/distance/detail/masked_nn.cuh | 7 - cpp/include/raft/matrix/batched_rearrange.cuh | 87 ++++++++ .../raft/matrix/detail/batched_rearrange.cuh | 158 ++++++++++++++ .../sparse/neighbors/connect_components.cuh | 34 +-- .../neighbors/detail/connect_components.cuh | 203 ++++++------------ .../sparse/neighbors/connect_components.cu | 11 +- 7 files changed, 352 insertions(+), 159 deletions(-) create mode 100644 cpp/include/raft/matrix/batched_rearrange.cuh create mode 100644 cpp/include/raft/matrix/detail/batched_rearrange.cuh diff --git a/cpp/include/raft/cluster/detail/mst.cuh b/cpp/include/raft/cluster/detail/mst.cuh index 0a84eb0582..3e6193dd9e 100644 --- a/cpp/include/raft/cluster/detail/mst.cuh +++ b/cpp/include/raft/cluster/detail/mst.cuh @@ -80,8 +80,15 @@ void connect_knn_graph( raft::sparse::COO connected_edges(stream); - raft::sparse::neighbors::connect_components( - handle, connected_edges, X, color, m, n, n, reduction_op); + raft::sparse::neighbors::connect_components(handle, + connected_edges, + X, + color, + m, + n, + reduction_op, + min(m, (size_t)4096), + min(n, (size_t)16)); rmm::device_uvector indptr2(m + 1, stream); raft::sparse::convert::sorted_coo_to_csr( diff --git a/cpp/include/raft/distance/detail/masked_nn.cuh b/cpp/include/raft/distance/detail/masked_nn.cuh index 9114f2bd04..27a2ce36fb 100644 --- a/cpp/include/raft/distance/detail/masked_nn.cuh +++ b/cpp/include/raft/distance/detail/masked_nn.cuh @@ -16,7 +16,6 @@ #pragma once -#include "raft/core/logger-macros.hpp" #include #include @@ -319,14 +318,8 @@ void masked_l2_nn_impl(raft::device_resources const& handle, pairRedOp, core_lambda, fin_op); - handle.sync_stream(stream); - uint32_t masked_nn_kernel_end = curTimeMillis(); - RAFT_LOG_INFO("Time taken by masked nn kernel %zu", masked_nn_kernel_end - masked_nn_kernel_start); RAFT_CUDA_TRY(cudaGetLastError()); - size_t free, total; - cudaMemGetInfo(&free, &total); - RAFT_LOG_INFO("Peak memory usage during masked nn: Free memory (MB) %zu; Used memory (MB) %zu", free / (1024 * 1024), (total - free) / (1024 * 1024)); } } // namespace detail diff --git a/cpp/include/raft/matrix/batched_rearrange.cuh b/cpp/include/raft/matrix/batched_rearrange.cuh new file mode 100644 index 0000000000..8644f63255 --- /dev/null +++ b/cpp/include/raft/matrix/batched_rearrange.cuh @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +namespace raft { +namespace matrix { + +/** + * In-place gather elements from a row-major source matrix into a destination range according to a map. The length of the map is equal to the number of rows. Batching is done on columns and an additional scratch space of shape n_rows * cols_batch_size is created. For each batch, chunks of columns from each row are copied into the appropriate location in the scratch space and copied back to the corresponding locations in the input matrix. + * @tparam value_idx + * @tparam value_t + * @param[in] handle raft handle + * @param[out] in input matrix (n_rows * n_cols) + * @param[in] map map containing the order in which rows are to be rearranged (n_rows) + * @param D Number of columns of the input/output matrices + * @param N Number of rows of the input matrix + * @param col_batch_size column batch size + */ +template +void batched_gather(raft::device_resources const& handle, + T* in, + IdxT* map, + size_t D, + size_t N, + size_t col_batch_size) +{ + RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= D, "col_batch_size should be > 0 and <= D"); + detail::batched_gather(handle, + in, + map, + D, + N, + col_batch_size); +} + +/** + * In-place scatter elements in a row-major matrix according to a + * map. The length of the map is equal to the number of rows. + * Batching is done on columns and an additional scratch space of + * shape n_rows * cols_batch_size is created. For each batch, chunks + * of columns from each row are copied into the appropriate location + * in the scratch space and copied back to the corresponding locations + * in the input matrix. * @tparam value_idx + * @tparam value_t + * @param[in] handle raft handle + * @param[out] in input matrix (n_rows * n_cols) + * @param[in] map map containing the destination index for each row (n_rows) + * @param D Number of columns of the input/output matrices + * @param N Number of rows of the input matrix + * @param col_batch_size column batch size + */ +template +void batched_scatter(raft::device_resources const& handle, + T* in, + IdxT* map, + size_t D, + size_t N, + size_t col_batch_size) +{ + RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= D, "col_batch_size should be > 0 and <= D"); + detail::batched_scatter(handle, + in, + map, + D, + N, + col_batch_size); +} + +}; // end namespace matrix +}; // end namespace raft \ No newline at end of file diff --git a/cpp/include/raft/matrix/detail/batched_rearrange.cuh b/cpp/include/raft/matrix/detail/batched_rearrange.cuh new file mode 100644 index 0000000000..a48ad49660 --- /dev/null +++ b/cpp/include/raft/matrix/detail/batched_rearrange.cuh @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +namespace raft { +namespace matrix { +namespace detail { + +/** + * In-place gather elements in a row-major matrix according to a + * map. The length of the map is equal to the number of rows. + * Batching is done on columns and an additional scratch space of + * shape n_rows * cols_batch_size is created. For each batch, chunks + * of columns from each row are copied into the appropriate location + * in the scratch space and copied back to the corresponding locations + * in the input matrix. + * @tparam value_idx + * @tparam value_t + * @param[in] handle raft handle + * @param[out] in input matrix (n_rows * n_cols) + * @param[in] map map containing the order in which rows are to be rearranged (n_rows) + * @param D Number of columns of the input/output matrices + * @param N Number of rows of the input matrix + * @param batch_size column batch size + */ +template +void batched_gather(raft::device_resources const& handle, + T* in, + IdxT* map, + size_t D, + size_t N, + size_t batch_size) +{ + auto exec_policy = handle.get_thrust_policy(); + size_t n_batches = raft::ceildiv(D, batch_size); + for (size_t bid = 0; bid < n_batches; bid++) { + size_t batch_offset = bid * batch_size; + size_t cols_per_batch = min(batch_size, D - (size_t)batch_offset); + auto scratch_space = raft::make_device_vector(handle, N * cols_per_batch); + + auto scatter_op = + [in, map, batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), D] __device__( + auto idx) { + IdxT row = idx / cols_per_batch; + IdxT col = idx % cols_per_batch; + return in[map[row] * D + batch_offset + col]; + }; + raft::linalg::map_offset(handle, scratch_space.view(), scatter_op); + auto copy_op = [in, + map, + scratch_space = scratch_space.data_handle(), + batch_offset, + cols_per_batch = raft::util::FastIntDiv(cols_per_batch), + D] __device__(auto idx) { + IdxT row = idx / cols_per_batch; + IdxT col = idx % cols_per_batch; + return in[row * D + batch_offset + col] = scratch_space[idx]; + }; + auto counting = thrust::make_counting_iterator(0); + thrust::for_each(exec_policy, counting, counting + N * batch_size, copy_op); + } +} + +/** + * In-place scatter elements in a row-major matrix according to a + * map. The length of the map is equal to the number of rows. + * Batching is done on columns and an additional scratch space of + * shape n_rows * cols_batch_size is created. For each batch, chunks + * of columns from each row are copied into the appropriate location + * in the scratch space and copied back to the corresponding locations + * in the input matrix. + * @tparam T + * @tparam IdxT + * @param[in] handle raft handle + * @param[out] in input matrix (n_rows * n_cols) + * @param[in] map map containing the destination index for each row (n_rows) + * @param D Number of columns of the input/output matrices + * @param N Number of rows of the input matrix + * @param batch_size column batch size + */ +template +void batched_scatter(raft::device_resources const& handle, + T* in, + IdxT* map, + size_t D, + size_t N, + size_t batch_size) +{ + auto stream = handle.get_stream(); + auto exec_policy = handle.get_thrust_policy(); + + size_t n_batches = raft::ceildiv(D, batch_size); + + for (size_t bid = 0; bid < n_batches; bid++) { + size_t batch_offset = bid * batch_size; + size_t cols_per_batch = min(batch_size, D - batch_offset); + auto scratch_space = raft::make_device_vector(handle, N * cols_per_batch); + + auto scatter_op = + [in, map, batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), D] __device__( + auto idx) { + IdxT row = idx / cols_per_batch; + IdxT col = idx % cols_per_batch; + return in[row * D + batch_offset + col]; + }; + raft::linalg::map_offset(handle, scratch_space.view(), scatter_op); + auto copy_op = [in, + map, + scratch_space = scratch_space.data_handle(), + batch_offset, + cols_per_batch = raft::util::FastIntDiv(cols_per_batch), + D] __device__(auto idx) { + IdxT row = idx / cols_per_batch; + IdxT col = idx % cols_per_batch; + in[map[row] * D + batch_offset + col] = scratch_space[idx]; + }; + auto counting = thrust::make_counting_iterator(0); + thrust::for_each(exec_policy, counting, counting + N * batch_size, copy_op); + } +} + +}; // end namespace detail +}; // end namespace matrix +}; // end namespace raft \ No newline at end of file diff --git a/cpp/include/raft/sparse/neighbors/connect_components.cuh b/cpp/include/raft/sparse/neighbors/connect_components.cuh index efc666b1b7..88a70026c3 100644 --- a/cpp/include/raft/sparse/neighbors/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/connect_components.cuh @@ -59,21 +59,31 @@ value_idx get_n_components(value_idx* colors, size_t n_rows, cudaStream_t stream * @param[in] orig_colors array containing component number for each row of X * @param[in] n_rows number of rows in X * @param[in] n_cols number of cols in X - * @param[in] reduction_op - * @param[in] metric + * @param[in] reduction_op reduction operation for computing nearest neighbors. The reduction + * operation must have `gather` and `scatter` functions defined. For single linkage clustering, + * these functions are no-ops. For HDBSCAN, they sort and 'unsort' the core distances based on color + * @param[in] row_batch_size the batch size for computing nearest neighbors. This parameter controls + * the number of samples for which the nearest neighbors are computed at once. Therefore, it affects + * the memory consumption mainly by reducing the size of the adjacency matrix for masked nearest + * neighbors computation + * @param[in] col_batch_size the input data is sorted and 'unsorted' based on color. An additional + * scratch space buffer of shape (n_rows, col_batch_size) is created for this. Usually, this + * parameter affects the memory consumption more drastically than the col_batch_size with a marginal + * increase in compute time as the col_batch_size is reduced */ template -void connect_components( - raft::device_resources const& handle, - raft::sparse::COO& out, - const value_t* X, - const value_idx* orig_colors, - size_t n_rows, - size_t n_cols, - size_t col_batch_size, - red_op reduction_op) +void connect_components(raft::device_resources const& handle, + raft::sparse::COO& out, + const value_t* X, + const value_idx* orig_colors, + size_t n_rows, + size_t n_cols, + red_op reduction_op, + size_t row_batch_size = 256, + size_t col_batch_size = 32) { - detail::connect_components(handle, out, X, orig_colors, n_rows, n_cols, col_batch_size, reduction_op); + detail::connect_components( + handle, out, X, orig_colors, n_rows, n_cols, reduction_op, row_batch_size, col_batch_size); } }; // end namespace raft::sparse::neighbors \ No newline at end of file diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 8ff57c6235..f75d2b79b8 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -15,15 +15,14 @@ */ #pragma once -#include "raft/core/logger-macros.hpp" #include #include -#include #include #include #include #include +#include #include #include #include @@ -175,100 +174,12 @@ struct LookupColorOp { } }; -template -void batched_gather(raft::device_resources const& handle, - value_t* X, - value_idx* map, - size_t m, - size_t n, - size_t batch_size) -{ - auto stream = handle.get_stream(); - auto exec_policy = handle.get_thrust_policy(); - - value_idx n_batches = raft::ceildiv((value_idx)n, (value_idx)batch_size); - RAFT_LOG_INFO("n_batches %d", n_batches); - size_t free, total; - cudaMemGetInfo(&free, &total); - RAFT_LOG_INFO("Peak memory usage before batched gather: Free memory (MB) %zu; Used memory (MB) %zu", free / (1024 * 1024), (total - free) / (1024 * 1024)); - for (value_idx bid = 0; bid < n_batches; bid++) { - value_idx batch_offset = bid * batch_size; - value_idx cols_per_batch = min((value_idx)batch_size, (value_idx)n - batch_offset); - auto scratch_space = raft::make_device_vector(handle, m * cols_per_batch); - - auto scatter_op = - [X, map, batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n] __device__( - auto idx) { - value_idx row = idx / cols_per_batch; - value_idx col = idx % cols_per_batch; - return X[map[row] * n + batch_offset + col]; - }; - raft::linalg::map_offset(handle, scratch_space.view(), scatter_op); - auto copy_op = [X, - map, - scratch_space = scratch_space.data_handle(), - batch_offset, - cols_per_batch = raft::util::FastIntDiv(cols_per_batch), - n] __device__(auto idx) { - value_idx row = idx / cols_per_batch; - value_idx col = idx % cols_per_batch; - return X[row * n + batch_offset + col] = scratch_space[idx]; - }; - auto counting = thrust::make_counting_iterator(0); - thrust::for_each(exec_policy, counting, counting + m * batch_size, copy_op); - if (bid == n_batches - 1) { - cudaMemGetInfo(&free, &total); - RAFT_LOG_INFO("Peak memory usage during batched gather: Free memory (MB) %zu; Used memory (MB) %zu", free / (1024 * 1024), (total - free) / (1024 * 1024)); - } - } -} - -template -void batched_scatter(raft::device_resources const& handle, - value_t* X, - value_idx* map, - size_t m, - size_t n, - size_t batch_size) -{ - auto stream = handle.get_stream(); - auto exec_policy = handle.get_thrust_policy(); - - value_idx n_batches = raft::ceildiv((value_idx)n, (value_idx)batch_size); - - for (value_idx bid = 0; bid < n_batches; bid++) { - value_idx batch_offset = bid * batch_size; - value_idx cols_per_batch = min((value_idx)batch_size, (value_idx)n - batch_offset); - auto scratch_space = raft::make_device_vector(handle, m * cols_per_batch); - - auto scatter_op = - [X, map, batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n] __device__( - auto idx) { - value_idx row = idx / cols_per_batch; - value_idx col = idx % cols_per_batch; - return X[row * n + batch_offset + col]; - }; - raft::linalg::map_offset(handle, scratch_space.view(), scatter_op); - auto copy_op = [X, - map, - scratch_space = scratch_space.data_handle(), - batch_offset, - cols_per_batch = raft::util::FastIntDiv(cols_per_batch), - n] __device__(auto idx) { - value_idx row = idx / cols_per_batch; - value_idx col = idx % cols_per_batch; - X[map[row] * n + batch_offset + col] = scratch_space[idx]; - }; - auto counting = thrust::make_counting_iterator(0); - thrust::for_each(exec_policy, counting, counting + m * batch_size, copy_op); - } -} - /** * Compute the cross-component 1-nearest neighbors for each row in X using * the given array of components * @tparam value_idx * @tparam value_t + * @param[in] handle raft handle * @param[out] kvp mapping of closest neighbor vertex and distance for each vertex in the given * array of components * @param[out] nn_colors components of nearest neighbors for each vertex @@ -276,7 +187,8 @@ void batched_scatter(raft::device_resources const& handle, * @param[in] X original dense data * @param[in] n_rows number of rows in original dense data * @param[in] n_cols number of columns in original dense data - * @param[in] stream cuda stream for which to order cuda operations + * @param[in] batch_size batch size for computing nearest neighbors + * @param[in] reduction_op reduction operation for computing nearest neighbors */ template void perform_1nn(raft::device_resources const& handle, @@ -286,28 +198,16 @@ void perform_1nn(raft::device_resources const& handle, const value_t* X, size_t n_rows, size_t n_cols, + size_t batch_size, red_op reduction_op) { auto stream = handle.get_stream(); - auto x_norm = raft::make_device_vector(handle, n_rows); - - raft::linalg::rowNorm( - x_norm.data_handle(), X, n_cols, n_rows, raft::linalg::L2Norm, true, stream); - value_idx n_components = get_n_components(colors, n_rows, stream); auto colors_group_idxs = raft::make_device_vector(handle, n_components + 1); raft::sparse::convert::sorted_coo_to_csr( colors, n_rows, colors_group_idxs.data_handle(), n_components + 1, stream); - auto adj = raft::make_device_matrix(handle, n_rows, n_components); - auto mask_op = [colors, - n_components = raft::util::FastIntDiv(n_components)] __device__(value_idx idx) { - value_idx row = idx / n_components; - value_idx col = idx % n_components; - return colors[row] != col; - }; - raft::linalg::map_offset(handle, adj.view(), mask_op); auto kvp_view = raft::make_device_vector_view, value_idx>(kvp, n_rows); using OutT = raft::KeyValuePair; @@ -315,20 +215,44 @@ void perform_1nn(raft::device_resources const& handle, ParamT params{reduction_op, reduction_op, true, true}; - auto X_view = raft::make_device_matrix_view(X, n_rows, n_cols); - uint32_t masked_nn_kernel_start = curTimeMillis(); - raft::distance::masked_l2_nn( - handle, - params, - X_view, - X_view, - x_norm.view(), - x_norm.view(), - adj.view(), - raft::make_device_vector_view(colors_group_idxs.data_handle() + 1, n_components), - kvp_view); - uint32_t masked_nn_kernel_end = curTimeMillis(); - RAFT_LOG_INFO("Time taken by masked_nn function (ms) %zu", masked_nn_kernel_end - masked_nn_kernel_start); + size_t n_batches = raft::ceildiv(n_rows, batch_size); + for (size_t bid = 0; bid < n_batches; bid++) { + size_t batch_offset = bid * batch_size; + size_t rows_per_batch = min(batch_size, n_rows - batch_offset); + auto x_norm = raft::make_device_vector(handle, rows_per_batch); + raft::linalg::rowNorm(x_norm.data_handle(), + X + batch_offset * n_cols, + n_cols, + rows_per_batch, + raft::linalg::L2Norm, + true, + stream); + + auto X_view = raft::make_device_matrix_view( + X + batch_offset * n_cols, rows_per_batch, n_cols); + + auto adj = raft::make_device_matrix(handle, rows_per_batch, n_components); + auto mask_op = [colors, + n_components = raft::util::FastIntDiv(n_components), + batch_offset] __device__(value_idx idx) { + value_idx row = idx / n_components; + value_idx col = idx % n_components; + return colors[batch_offset + row] != col; + }; + raft::linalg::map_offset(handle, adj.view(), mask_op); + + raft::distance::masked_l2_nn( + handle, + params, + X_view, + X_view, + x_norm.view(), + x_norm.view(), + adj.view(), + raft::make_device_vector_view(colors_group_idxs.data_handle() + 1, n_components), + kvp_view); + uint32_t masked_nn_kernel_end = curTimeMillis(); + } LookupColorOp extract_colors_op(colors); thrust::transform(rmm::exec_policy(stream), kvp, kvp + n_rows, nn_colors, extract_colors_op); } @@ -432,9 +356,20 @@ void min_components_by_color(raft::sparse::COO& coo, * @param[out] out output edge list containing nearest cross-component * edges. * @param[in] X original (row-major) dense matrix for which knn graph should be constructed. - * @param[in] colors array containing component number for each row of X + * @param[in] orig_colors array containing component number for each row of X * @param[in] n_rows number of rows in X * @param[in] n_cols number of cols in X + * @param[in] reduction_op reduction operation for computing nearest neighbors. The reduction + * operation must have `gather` and `scatter` functions defined. For single linkage clustering, + * these functions are no-ops. For HDBSCAN, they sort and 'unsort' the core distances based on color + * @param[in] row_batch_size the batch size for computing nearest neighbors. This parameter controls + * the number of samples for which the nearest neighbors are computed at once. Therefore, it affects + * the memory consumption mainly by reducing the size of the adjacency matrix for masked nearest + * neighbors computation + * @param[in] col_batch_size the input data is sorted and 'unsorted' based on color. An additional + * scratch space buffer of shape (n_rows, col_batch_size) is created for this. Usually, this + * parameter affects the memory consumption more drastically than the col_batch_size with a marginal + * increase in compute time as the col_batch_size is reduced */ template void connect_components(raft::device_resources const& handle, @@ -443,11 +378,14 @@ void connect_components(raft::device_resources const& handle, const value_idx* orig_colors, size_t n_rows, size_t n_cols, - size_t col_batch_size, - red_op reduction_op) + red_op reduction_op, + size_t row_batch_size, + size_t col_batch_size) { - - RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= n_cols, "col_batch_size should be > 0 and <= n_cols"); + RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= n_cols, + "col_batch_size should be > 0 and <= n_cols"); + RAFT_EXPECTS(0 < row_batch_size && row_batch_size <= n_rows, + "row_batch_size should be > 0 and <= n_rows"); auto stream = handle.get_stream(); rmm::device_uvector colors(n_rows, stream); @@ -468,11 +406,11 @@ void connect_components(raft::device_resources const& handle, // Modify the reduction operation based on the sort plan. This is particularly needed for HDBSCAN reduction_op.gather(handle, sort_plan.data()); - batched_gather(handle, const_cast(X), sort_plan.data(), n_rows, n_cols, col_batch_size); + raft::matrix::batched_gather( + handle, const_cast(X), sort_plan.data(), n_rows, n_cols, col_batch_size); uint32_t sort_end = curTimeMillis(); - RAFT_LOG_INFO("Time required to sort (ms) %zu", sort_end - sort_start); /** * First compute 1-nn for all colors where the color of each data point * is guaranteed to be != color of its nearest neighbor. @@ -489,6 +427,7 @@ void connect_components(raft::device_resources const& handle, X, n_rows, n_cols, + n_rows, reduction_op); /** @@ -529,19 +468,11 @@ void connect_components(raft::device_resources const& handle, temp_inds_dists.data(), n_rows, stream); - uint32_t op_end = curTimeMillis(); - - RAFT_LOG_INFO("Time required for all operations between sort and unsort (ms) %zu", op_end - op_start); - uint32_t unsort_start = curTimeMillis(); - - batched_scatter(handle, const_cast(X), sort_plan.data(), n_rows, n_cols, col_batch_size); + raft::matrix::batched_scatter( + handle, const_cast(X), sort_plan.data(), n_rows, n_cols, col_batch_size); reduction_op.scatter(handle, sort_plan.data()); - uint32_t unsort_end = curTimeMillis(); - - RAFT_LOG_INFO("Time required to unsort (ms) %zu", unsort_end - unsort_start); - /** * Symmetrize resulting edge list */ diff --git a/cpp/test/sparse/neighbors/connect_components.cu b/cpp/test/sparse/neighbors/connect_components.cu index 7b97f4b3a0..77717a89bb 100644 --- a/cpp/test/sparse/neighbors/connect_components.cu +++ b/cpp/test/sparse/neighbors/connect_components.cu @@ -115,8 +115,15 @@ class ConnectComponentsTest * 3. connect_components to fix connectivities */ raft::linkage::FixConnectivitiesRedOp red_op(params.n_row); - raft::linkage::connect_components( - handle, out_edges, data.data(), colors.data(), params.n_row, params.n_col, params.n_col, red_op); + raft::linkage::connect_components(handle, + out_edges, + data.data(), + colors.data(), + params.n_row, + params.n_col, + red_op, + params.n_row, + params.n_col); /** * Construct final edge list From 9ecd7825fe63570e9513729f7fb56cdcd98012f4 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Fri, 12 May 2023 13:17:06 -0700 Subject: [PATCH 13/53] Changes after PR Reviews --- .../raft/distance/detail/masked_nn.cuh | 2 - cpp/include/raft/matrix/batched_rearrange.cuh | 79 ++++++++++------- .../sparse/neighbors/connect_components.cuh | 4 +- .../neighbors/detail/connect_components.cuh | 85 ++++++++++--------- .../sparse/neighbors/detail/knn_graph.cuh | 1 - 5 files changed, 95 insertions(+), 76 deletions(-) diff --git a/cpp/include/raft/distance/detail/masked_nn.cuh b/cpp/include/raft/distance/detail/masked_nn.cuh index 27a2ce36fb..1cf7188b06 100644 --- a/cpp/include/raft/distance/detail/masked_nn.cuh +++ b/cpp/include/raft/distance/detail/masked_nn.cuh @@ -298,8 +298,6 @@ void masked_l2_nn_impl(raft::device_resources const& handle, dim3 block(P::Nthreads); dim3 grid = launchConfigGenerator

(m, n, smemSize, kernel); - handle.sync_stream(stream); - uint32_t masked_nn_kernel_start = curTimeMillis(); kernel<<>>(out, x, y, diff --git a/cpp/include/raft/matrix/batched_rearrange.cuh b/cpp/include/raft/matrix/batched_rearrange.cuh index 8644f63255..ba1da50b6b 100644 --- a/cpp/include/raft/matrix/batched_rearrange.cuh +++ b/cpp/include/raft/matrix/batched_rearrange.cuh @@ -15,73 +15,86 @@ */ #pragma once -#include +#include #include +#include #include namespace raft { namespace matrix { /** - * In-place gather elements from a row-major source matrix into a destination range according to a map. The length of the map is equal to the number of rows. Batching is done on columns and an additional scratch space of shape n_rows * cols_batch_size is created. For each batch, chunks of columns from each row are copied into the appropriate location in the scratch space and copied back to the corresponding locations in the input matrix. + * @brief In-place gather elements in a row-major matrix according to a + * map. The length of the map is equal to the number of rows. + * Batching is done on columns and an additional scratch space of + * shape n_rows * cols_batch_size is created. For each batch, chunks + * of columns from each row are copied into the appropriate location + * in the scratch space and copied back to the corresponding locations + * in the input matrix + * * @tparam value_idx * @tparam value_t + * * @param[in] handle raft handle - * @param[out] in input matrix (n_rows * n_cols) + * @param[inout] in input matrix (n_rows * n_cols) * @param[in] map map containing the order in which rows are to be rearranged (n_rows) - * @param D Number of columns of the input/output matrices - * @param N Number of rows of the input matrix - * @param col_batch_size column batch size + * @param[in] col_batch_size column batch size */ template void batched_gather(raft::device_resources const& handle, - T* in, - IdxT* map, - size_t D, - size_t N, + raft::device_matrix_view in, + raft::device_vector_view map, size_t col_batch_size) { - RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= D, "col_batch_size should be > 0 and <= D"); + IdxT m = in.extent(0); + IdxT n = in.extent(1); + IdxT map_len = map.extent(0); + RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= (size_t)n, "col_batch_size should be > 0 and <= n"); + RAFT_EXPECTS(map_len == m, "size of map should be equal to the number of rows in input matrix"); + detail::batched_gather(handle, - in, - map, - D, - N, + in.data_handle(), + map.data_handle(), + n, + m, col_batch_size); } /** - * In-place scatter elements in a row-major matrix according to a + * @brief In-place scatter elements in a row-major matrix according to a * map. The length of the map is equal to the number of rows. * Batching is done on columns and an additional scratch space of * shape n_rows * cols_batch_size is created. For each batch, chunks * of columns from each row are copied into the appropriate location * in the scratch space and copied back to the corresponding locations - * in the input matrix. * @tparam value_idx + * in the input matrix + * + * @tparam value_idx * @tparam value_t + * * @param[in] handle raft handle - * @param[out] in input matrix (n_rows * n_cols) - * @param[in] map map containing the destination index for each row (n_rows) - * @param D Number of columns of the input/output matrices - * @param N Number of rows of the input matrix - * @param col_batch_size column batch size + * @param[inout] in input matrix (n_rows * n_cols) + * @param[in] map map containing destination index of each row (n_rows) + * @param[in] col_batch_size column batch size */ template void batched_scatter(raft::device_resources const& handle, - T* in, - IdxT* map, - size_t D, - size_t N, + raft::device_matrix_view in, + raft::device_vector_view map, size_t col_batch_size) { - RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= D, "col_batch_size should be > 0 and <= D"); + IdxT m = in.extent(0); + IdxT n = in.extent(1); + IdxT map_len = map.extent(0); + RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= (size_t)n, "col_batch_size should be > 0 and <= n"); + RAFT_EXPECTS(map_len == m, "size of map should be equal to the number of rows in input matrix"); + detail::batched_scatter(handle, - in, - map, - D, - N, - col_batch_size); + in.data_handle(), + map.data_handle(), + n, + m, + col_batch_size); } - }; // end namespace matrix }; // end namespace raft \ No newline at end of file diff --git a/cpp/include/raft/sparse/neighbors/connect_components.cuh b/cpp/include/raft/sparse/neighbors/connect_components.cuh index 88a70026c3..e3d7f6c72a 100644 --- a/cpp/include/raft/sparse/neighbors/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/connect_components.cuh @@ -79,8 +79,8 @@ void connect_components(raft::device_resources const& handle, size_t n_rows, size_t n_cols, red_op reduction_op, - size_t row_batch_size = 256, - size_t col_batch_size = 32) + size_t row_batch_size = 0, + size_t col_batch_size = 0) { detail::connect_components( handle, out, X, orig_colors, n_rows, n_cols, reduction_op, row_batch_size, col_batch_size); diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index f75d2b79b8..00d6bf53d8 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -15,6 +15,9 @@ */ #pragma once +#include "raft/core/device_mdarray.hpp" +#include "raft/core/device_mdspan.hpp" +#include "raft/linalg/map.cuh" #include #include @@ -52,9 +55,11 @@ namespace raft::sparse::neighbors::detail { /** - * Functor with reduction ops for performing fused 1-nn - * computation and guaranteeing only cross-component - * neighbors are considered. + * Functor with reduction ops for performing masked 1-nn + * computation. this change introduces a breaking change to + * the public API because colors are no longer a part of this + * op. The connect_components function internally ensures that + * only cross-component nearest neighbors are found. * @tparam value_idx * @tparam value_t */ @@ -208,8 +213,18 @@ void perform_1nn(raft::device_resources const& handle, raft::sparse::convert::sorted_coo_to_csr( colors, n_rows, colors_group_idxs.data_handle(), n_components + 1, stream); + auto x_norm= raft::make_device_vector(handle, (value_idx)n_rows); + raft::linalg::rowNorm(x_norm.data_handle(), + X, + n_cols, + n_rows, + raft::linalg::L2Norm, + true, + stream); auto kvp_view = raft::make_device_vector_view, value_idx>(kvp, n_rows); + + auto adj = raft::make_device_matrix(handle, batch_size, n_components); using OutT = raft::KeyValuePair; using ParamT = raft::distance::masked_l2_nn_params; @@ -219,19 +234,11 @@ void perform_1nn(raft::device_resources const& handle, for (size_t bid = 0; bid < n_batches; bid++) { size_t batch_offset = bid * batch_size; size_t rows_per_batch = min(batch_size, n_rows - batch_offset); - auto x_norm = raft::make_device_vector(handle, rows_per_batch); - raft::linalg::rowNorm(x_norm.data_handle(), - X + batch_offset * n_cols, - n_cols, - rows_per_batch, - raft::linalg::L2Norm, - true, - stream); auto X_view = raft::make_device_matrix_view( X + batch_offset * n_cols, rows_per_batch, n_cols); - auto adj = raft::make_device_matrix(handle, rows_per_batch, n_components); + auto x_norm_view = raft::make_device_vector_view(x_norm.data_handle() + batch_offset, rows_per_batch); auto mask_op = [colors, n_components = raft::util::FastIntDiv(n_components), batch_offset] __device__(value_idx idx) { @@ -239,19 +246,19 @@ void perform_1nn(raft::device_resources const& handle, value_idx col = idx % n_components; return colors[batch_offset + row] != col; }; - raft::linalg::map_offset(handle, adj.view(), mask_op); + auto adj_view = raft::make_device_matrix_view(adj.data_handle(), rows_per_batch, n_components); + raft::linalg::map_offset(handle, adj_view, mask_op); raft::distance::masked_l2_nn( handle, params, X_view, X_view, - x_norm.view(), - x_norm.view(), - adj.view(), + x_norm_view, + x_norm_view, + adj_view, raft::make_device_vector_view(colors_group_idxs.data_handle() + 1, n_components), kvp_view); - uint32_t masked_nn_kernel_end = curTimeMillis(); } LookupColorOp extract_colors_op(colors); thrust::transform(rmm::exec_policy(stream), kvp, kvp + n_rows, nn_colors, extract_colors_op); @@ -365,11 +372,12 @@ void min_components_by_color(raft::sparse::COO& coo, * @param[in] row_batch_size the batch size for computing nearest neighbors. This parameter controls * the number of samples for which the nearest neighbors are computed at once. Therefore, it affects * the memory consumption mainly by reducing the size of the adjacency matrix for masked nearest - * neighbors computation + * neighbors computation. default 0 indicates that no batching is done * @param[in] col_batch_size the input data is sorted and 'unsorted' based on color. An additional * scratch space buffer of shape (n_rows, col_batch_size) is created for this. Usually, this * parameter affects the memory consumption more drastically than the col_batch_size with a marginal - * increase in compute time as the col_batch_size is reduced + * increase in compute time as the col_batch_size is reduced. default 0 indicates that no batching is + * done */ template void connect_components(raft::device_resources const& handle, @@ -382,10 +390,16 @@ void connect_components(raft::device_resources const& handle, size_t row_batch_size, size_t col_batch_size) { - RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= n_cols, - "col_batch_size should be > 0 and <= n_cols"); - RAFT_EXPECTS(0 < row_batch_size && row_batch_size <= n_rows, - "row_batch_size should be > 0 and <= n_rows"); + RAFT_EXPECTS(col_batch_size <= n_cols, + "col_batch_size should be >= 0 and <= n_cols"); + RAFT_EXPECTS(row_batch_size <= n_rows, + "row_batch_size should be >= 0 and <= n_rows"); + if (row_batch_size == 0) { + row_batch_size = n_rows; + } + if (col_batch_size == 0) { + col_batch_size = n_cols; + } auto stream = handle.get_stream(); rmm::device_uvector colors(n_rows, stream); @@ -394,22 +408,18 @@ void connect_components(raft::device_resources const& handle, // Normalize colors so they are drawn from a monotonically increasing set raft::label::make_monotonic(colors.data(), colors.data(), n_rows, stream, true); - rmm::device_uvector sort_plan(n_rows, stream); - thrust::counting_iterator arg_sort_iter(0); - thrust::copy(rmm::exec_policy(stream), arg_sort_iter, arg_sort_iter + n_rows, sort_plan.data()); - - uint32_t sort_start = curTimeMillis(); + auto sort_plan = raft::make_device_vector(handle, (value_idx)n_rows); + raft::linalg::map_offset(handle, sort_plan.view(), [] __device__(value_idx idx) {return idx;}); thrust::sort_by_key( - handle.get_thrust_policy(), colors.data(), colors.data() + n_rows, sort_plan.data()); + handle.get_thrust_policy(), colors.data(), colors.data() + n_rows, sort_plan.data_handle()); // Modify the reduction operation based on the sort plan. This is particularly needed for HDBSCAN - reduction_op.gather(handle, sort_plan.data()); + reduction_op.gather(handle, sort_plan.data_handle()); + auto X_mutable_view = raft::make_device_matrix_view(const_cast(X), n_rows, n_cols); raft::matrix::batched_gather( - handle, const_cast(X), sort_plan.data(), n_rows, n_cols, col_batch_size); - - uint32_t sort_end = curTimeMillis(); + handle, X_mutable_view, sort_plan.view(), col_batch_size); /** * First compute 1-nn for all colors where the color of each data point @@ -419,7 +429,6 @@ void connect_components(raft::device_resources const& handle, rmm::device_uvector> temp_inds_dists(n_rows, stream); rmm::device_uvector src_indices(n_rows, stream); - uint32_t op_start = curTimeMillis(); perform_1nn(handle, temp_inds_dists.data(), nn_colors.data(), @@ -427,7 +436,7 @@ void connect_components(raft::device_resources const& handle, X, n_rows, n_cols, - n_rows, + row_batch_size, reduction_op); /** @@ -464,14 +473,14 @@ void connect_components(raft::device_resources const& handle, min_components_by_color(min_edges, out_index.data(), src_indices.data(), - sort_plan.data(), + sort_plan.data_handle(), temp_inds_dists.data(), n_rows, stream); raft::matrix::batched_scatter( - handle, const_cast(X), sort_plan.data(), n_rows, n_cols, col_batch_size); - reduction_op.scatter(handle, sort_plan.data()); + handle, X_mutable_view, sort_plan.view(), col_batch_size); + reduction_op.scatter(handle, sort_plan.data_handle()); /** * Symmetrize resulting edge list diff --git a/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh b/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh index d53f2f8df3..8bf8811446 100644 --- a/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/knn_graph.cuh @@ -125,7 +125,6 @@ void knn_graph(raft::device_resources const& handle, // pass value_idx through to knn. rmm::device_uvector int64_indices(nnz, stream); - uint32_t knn_start = curTimeMillis(); raft::spatial::knn::brute_force_knn(handle, inputs, sizes, From 3b99d3548a653767acf6502c57a61f34e481dedf Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Tue, 16 May 2023 10:45:01 -0700 Subject: [PATCH 14/53] Some updates after new PR Reviews --- .../raft/matrix/detail/batched_rearrange.cuh | 158 ------------------ cpp/include/raft/matrix/detail/gather.cuh | 62 ++++++- cpp/include/raft/matrix/detail/scatter.cuh | 91 ++++++++++ cpp/include/raft/matrix/gather.cuh | 38 +++++ cpp/include/raft/matrix/scatter.cuh | 62 +++++++ .../sparse/neighbors/connect_components.cuh | 2 +- .../neighbors/detail/connect_components.cuh | 26 +-- 7 files changed, 268 insertions(+), 171 deletions(-) delete mode 100644 cpp/include/raft/matrix/detail/batched_rearrange.cuh create mode 100644 cpp/include/raft/matrix/detail/scatter.cuh create mode 100644 cpp/include/raft/matrix/scatter.cuh diff --git a/cpp/include/raft/matrix/detail/batched_rearrange.cuh b/cpp/include/raft/matrix/detail/batched_rearrange.cuh deleted file mode 100644 index a48ad49660..0000000000 --- a/cpp/include/raft/matrix/detail/batched_rearrange.cuh +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include - -namespace raft { -namespace matrix { -namespace detail { - -/** - * In-place gather elements in a row-major matrix according to a - * map. The length of the map is equal to the number of rows. - * Batching is done on columns and an additional scratch space of - * shape n_rows * cols_batch_size is created. For each batch, chunks - * of columns from each row are copied into the appropriate location - * in the scratch space and copied back to the corresponding locations - * in the input matrix. - * @tparam value_idx - * @tparam value_t - * @param[in] handle raft handle - * @param[out] in input matrix (n_rows * n_cols) - * @param[in] map map containing the order in which rows are to be rearranged (n_rows) - * @param D Number of columns of the input/output matrices - * @param N Number of rows of the input matrix - * @param batch_size column batch size - */ -template -void batched_gather(raft::device_resources const& handle, - T* in, - IdxT* map, - size_t D, - size_t N, - size_t batch_size) -{ - auto exec_policy = handle.get_thrust_policy(); - size_t n_batches = raft::ceildiv(D, batch_size); - for (size_t bid = 0; bid < n_batches; bid++) { - size_t batch_offset = bid * batch_size; - size_t cols_per_batch = min(batch_size, D - (size_t)batch_offset); - auto scratch_space = raft::make_device_vector(handle, N * cols_per_batch); - - auto scatter_op = - [in, map, batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), D] __device__( - auto idx) { - IdxT row = idx / cols_per_batch; - IdxT col = idx % cols_per_batch; - return in[map[row] * D + batch_offset + col]; - }; - raft::linalg::map_offset(handle, scratch_space.view(), scatter_op); - auto copy_op = [in, - map, - scratch_space = scratch_space.data_handle(), - batch_offset, - cols_per_batch = raft::util::FastIntDiv(cols_per_batch), - D] __device__(auto idx) { - IdxT row = idx / cols_per_batch; - IdxT col = idx % cols_per_batch; - return in[row * D + batch_offset + col] = scratch_space[idx]; - }; - auto counting = thrust::make_counting_iterator(0); - thrust::for_each(exec_policy, counting, counting + N * batch_size, copy_op); - } -} - -/** - * In-place scatter elements in a row-major matrix according to a - * map. The length of the map is equal to the number of rows. - * Batching is done on columns and an additional scratch space of - * shape n_rows * cols_batch_size is created. For each batch, chunks - * of columns from each row are copied into the appropriate location - * in the scratch space and copied back to the corresponding locations - * in the input matrix. - * @tparam T - * @tparam IdxT - * @param[in] handle raft handle - * @param[out] in input matrix (n_rows * n_cols) - * @param[in] map map containing the destination index for each row (n_rows) - * @param D Number of columns of the input/output matrices - * @param N Number of rows of the input matrix - * @param batch_size column batch size - */ -template -void batched_scatter(raft::device_resources const& handle, - T* in, - IdxT* map, - size_t D, - size_t N, - size_t batch_size) -{ - auto stream = handle.get_stream(); - auto exec_policy = handle.get_thrust_policy(); - - size_t n_batches = raft::ceildiv(D, batch_size); - - for (size_t bid = 0; bid < n_batches; bid++) { - size_t batch_offset = bid * batch_size; - size_t cols_per_batch = min(batch_size, D - batch_offset); - auto scratch_space = raft::make_device_vector(handle, N * cols_per_batch); - - auto scatter_op = - [in, map, batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), D] __device__( - auto idx) { - IdxT row = idx / cols_per_batch; - IdxT col = idx % cols_per_batch; - return in[row * D + batch_offset + col]; - }; - raft::linalg::map_offset(handle, scratch_space.view(), scatter_op); - auto copy_op = [in, - map, - scratch_space = scratch_space.data_handle(), - batch_offset, - cols_per_batch = raft::util::FastIntDiv(cols_per_batch), - D] __device__(auto idx) { - IdxT row = idx / cols_per_batch; - IdxT col = idx % cols_per_batch; - in[map[row] * D + batch_offset + col] = scratch_space[idx]; - }; - auto counting = thrust::make_counting_iterator(0); - thrust::for_each(exec_policy, counting, counting + N * batch_size, copy_op); - } -} - -}; // end namespace detail -}; // end namespace matrix -}; // end namespace raft \ No newline at end of file diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index 7bd30e5bc6..783269f86b 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -13,11 +13,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #pragma once #include #include +#include +#include +#include +#include +#include +#include namespace raft { namespace matrix { @@ -343,6 +348,61 @@ void gather_if(const InputIteratorT in, typedef typename std::iterator_traits::value_type MapValueT; gatherImpl(in, D, N, map, stencil, map_length, out, pred_op, transform_op, stream); } + +/** + * In-place gather elements in a row-major matrix according to a + * map. The length of the map is equal to the number of rows. + * Batching is done on columns and an additional scratch space of + * shape n_rows * cols_batch_size is created. For each batch, chunks + * of columns from each row are copied into the appropriate location + * in the scratch space and copied back to the corresponding locations + * in the input matrix. + * @tparam value_idx + * @tparam value_t + * @param[in] handle raft handle + * @param[inout] in input matrix (n_rows * n_cols) + * @param[in] map map containing the order in which rows are to be rearranged (n_rows) + * @param batch_size column batch size + */ +template +void gather(raft::device_resources const& handle, + raft::device_matrix_view in, + raft::device_vector_view map, + IndexT batch_size) +{ + IndexT m = in.extent(0); + IndexT n = in.extent(1); + + auto exec_policy = handle.get_thrust_policy(); + IndexT n_batches = raft::ceildiv(n, batch_size); + for (IndexT bid = 0; bid < n_batches; bid++) { + IndexT batch_offset = bid * batch_size; + IndexT cols_per_batch = min(batch_size, n - batch_offset); + auto scratch_space = raft::make_device_vector(handle, n * cols_per_batch); + + auto scatter_op = + [in = in.data_handle(), map = map.data_handle(), batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n] __device__( + auto idx) { + IndexT row = idx / cols_per_batch; + IndexT col = idx % cols_per_batch; + return in[map[row] * n + batch_offset + col]; + }; + raft::linalg::map_offset(handle, scratch_space.view(), scatter_op); + auto copy_op = [in = in.data_handle(), + map = map.data_handle(), + scratch_space = scratch_space.data_handle(), + batch_offset, + cols_per_batch = raft::util::FastIntDiv(cols_per_batch), + n] __device__(auto idx) { + IndexT row = idx / cols_per_batch; + IndexT col = idx % cols_per_batch; + return in[row * n + batch_offset + col] = scratch_space[idx]; + }; + auto counting = thrust::make_counting_iterator(0); + thrust::for_each(exec_policy, counting, counting + n * batch_size, copy_op); + } +} + } // namespace detail } // namespace matrix } // namespace raft diff --git a/cpp/include/raft/matrix/detail/scatter.cuh b/cpp/include/raft/matrix/detail/scatter.cuh new file mode 100644 index 0000000000..96cd9c5321 --- /dev/null +++ b/cpp/include/raft/matrix/detail/scatter.cuh @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace matrix { +namespace detail { + +/** + * In-place scatter elements in a row-major matrix according to a + * map. The length of the map is equal to the number of rows. + * Batching is done on columns and an additional scratch space of + * shape n_rows * cols_batch_size is created. For each batch, chunks + * of columns from each row are copied into the appropriate location + * in the scratch space and copied back to the corresponding locations + * in the input matrix. + * @tparam InputIteratorT + * @tparam IndexT + * + * @param[in] handle raft handle + * @param[inout] in input matrix (n_rows * n_cols) + * @param[in] map map containing the destination index for each row (n_rows) + * @param[in] batch_size column batch size + */ +template +void scatter(raft::device_resources const& handle, + raft::device_matrix_view in, + raft::device_vector_view map, + IndexT batch_size) +{ + IndexT m = in.extent(0); + IndexT n = in.extent(1); + + auto stream = handle.get_stream(); + auto exec_policy = handle.get_thrust_policy(); + + IndexT n_batches = raft::ceildiv(n, batch_size); + + for (IndexT bid = 0; bid < n_batches; bid++) { + IndexT batch_offset = bid * batch_size; + IndexT cols_per_batch = min(batch_size, n - batch_offset); + auto scratch_space = raft::make_device_vector(handle, m * cols_per_batch); + + auto scatter_op = + [in = in.data_handle(), map = map.data_handle(), batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n] __device__( + auto idx) { + IndexT row = idx / cols_per_batch; + IndexT col = idx % cols_per_batch; + return in[row * n + batch_offset + col]; + }; + raft::linalg::map_offset(handle, scratch_space.view(), scatter_op); + auto copy_op = [in = in.data_handle(), + map = map.data_handle(), + scratch_space = scratch_space.data_handle(), + batch_offset, + cols_per_batch = raft::util::FastIntDiv(cols_per_batch), + n] __device__(auto idx) { + IndexT row = idx / cols_per_batch; + IndexT col = idx % cols_per_batch; + in[map[row] * n + batch_offset + col] = scratch_space[idx]; + }; + auto counting = thrust::make_counting_iterator(0); + thrust::for_each(exec_policy, counting, counting + m * batch_size, copy_op); + } +} + +} // end namespace detail +} // end namespace matrix +} // end namespace raft \ No newline at end of file diff --git a/cpp/include/raft/matrix/gather.cuh b/cpp/include/raft/matrix/gather.cuh index 7710789bfe..651dc601ac 100644 --- a/cpp/include/raft/matrix/gather.cuh +++ b/cpp/include/raft/matrix/gather.cuh @@ -288,6 +288,44 @@ void gather_if(const raft::device_resources& handle, handle.get_stream()); } +/** + * @brief In-place gather elements in a row-major matrix according to a + * map. The length of the map is equal to the number of rows. + * Batching is done on columns and an additional scratch space of + * shape n_rows * cols_batch_size is created. For each batch, chunks + * of columns from each row are copied into the appropriate location + * in the scratch space and copied back to the corresponding locations + * in the input matrix + * + * @tparam matrix_t + * @tparam map_t + * @tparam idx_t + * + * @param[in] handle raft handle + * @param[inout] in input matrix (n_rows * n_cols) + * @param[in] map map containing the order in which rows are to be rearranged (n_rows) + * @param[in] col_batch_size column batch size + */ +template +void gather(raft::device_resources const& handle, + raft::device_matrix_view in, + raft::device_vector_view map, + idx_t col_batch_size) +{ + idx_t m = in.extent(0); + idx_t n = in.extent(1); + idx_t map_len = map.extent(0); + RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= n, "col_batch_size should be > 0 and <= n"); + RAFT_EXPECTS(map_len == m, "size of map should be equal to the number of rows in input matrix"); + + detail::gather(handle, + in, + map, + col_batch_size); +} + /** @} */ // end of group matrix_gather } // namespace raft::matrix diff --git a/cpp/include/raft/matrix/scatter.cuh b/cpp/include/raft/matrix/scatter.cuh new file mode 100644 index 0000000000..37e0e38af2 --- /dev/null +++ b/cpp/include/raft/matrix/scatter.cuh @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace raft::matrix { +/** + * @brief In-place scatter elements in a row-major matrix according to a + * map. The length of the map is equal to the number of rows. + * Batching is done on columns and an additional scratch space of + * shape n_rows * cols_batch_size is created. For each batch, chunks + * of columns from each row are copied into the appropriate location + * in the scratch space and copied back to the corresponding locations + * in the input matrix. + * + * @tparam matrix_t + * @tparam map_t + * @tparam idx_t + * + * @param[in] handle raft handle + * @param[inout] in input matrix (n_rows * n_cols) + * @param[in] map map containing the order in which rows are to be rearranged (n_rows) + * @param[in] col_batch_size column batch size + */ +template +void scatter(raft::device_resources const& handle, + raft::device_matrix_view in, + raft::device_vector_view map, + idx_t col_batch_size) +{ + idx_t m = in.extent(0); + idx_t n = in.extent(1); + idx_t map_len = map.extent(0); + RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= n, "col_batch_size should be > 0 and <= n"); + RAFT_EXPECTS(map_len == m, "size of map should be equal to the number of rows in input matrix"); + + detail::scatter(handle, + in, + map, + col_batch_size); +} + +} \ No newline at end of file diff --git a/cpp/include/raft/sparse/neighbors/connect_components.cuh b/cpp/include/raft/sparse/neighbors/connect_components.cuh index e3d7f6c72a..9347ea448d 100644 --- a/cpp/include/raft/sparse/neighbors/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/connect_components.cuh @@ -68,7 +68,7 @@ value_idx get_n_components(value_idx* colors, size_t n_rows, cudaStream_t stream * neighbors computation * @param[in] col_batch_size the input data is sorted and 'unsorted' based on color. An additional * scratch space buffer of shape (n_rows, col_batch_size) is created for this. Usually, this - * parameter affects the memory consumption more drastically than the col_batch_size with a marginal + * parameter affects the memory consumption more drastically than the row_batch_size with a marginal * increase in compute time as the col_batch_size is reduced */ template diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 00d6bf53d8..75ba9bdc94 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -25,7 +25,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -37,7 +38,6 @@ #include #include -#include #include #include @@ -78,9 +78,7 @@ struct FixConnectivitiesRedOp { } } - DI KVP - - operator()(value_idx rit, const KVP& a, const KVP& b) + DI KVP operator()(value_idx rit, const KVP& a, const KVP& b) { if (rit < m && a.value < b.value) { return a; @@ -209,6 +207,11 @@ void perform_1nn(raft::device_resources const& handle, auto stream = handle.get_stream(); value_idx n_components = get_n_components(colors, n_rows, stream); + + // colors_group_idxs is an array containing the *end* indices of each color + // component in colors. That is, the value of colors_group_idxs[j] indicates + // the start of color j + 1, i.e., it is the inclusive scan of the sizes of + // the color components. auto colors_group_idxs = raft::make_device_vector(handle, n_components + 1); raft::sparse::convert::sorted_coo_to_csr( colors, n_rows, colors_group_idxs.data_handle(), n_components + 1, stream); @@ -403,10 +406,10 @@ void connect_components(raft::device_resources const& handle, auto stream = handle.get_stream(); rmm::device_uvector colors(n_rows, stream); - raft::copy_async(colors.data(), orig_colors, n_rows, stream); // Normalize colors so they are drawn from a monotonically increasing set - raft::label::make_monotonic(colors.data(), colors.data(), n_rows, stream, true); + bool zero_based = true; + raft::label::make_monotonic(colors.data(), const_cast(orig_colors), n_rows, stream, zero_based); auto sort_plan = raft::make_device_vector(handle, (value_idx)n_rows); raft::linalg::map_offset(handle, sort_plan.view(), [] __device__(value_idx idx) {return idx;}); @@ -418,8 +421,9 @@ void connect_components(raft::device_resources const& handle, reduction_op.gather(handle, sort_plan.data_handle()); auto X_mutable_view = raft::make_device_matrix_view(const_cast(X), n_rows, n_cols); - raft::matrix::batched_gather( - handle, X_mutable_view, sort_plan.view(), col_batch_size); + auto sort_plan_const_view = raft::make_device_vector_view(sort_plan.data_handle(), n_rows); + raft::matrix::gather( + handle, X_mutable_view, sort_plan_const_view, (value_idx)col_batch_size); /** * First compute 1-nn for all colors where the color of each data point @@ -478,8 +482,8 @@ void connect_components(raft::device_resources const& handle, n_rows, stream); - raft::matrix::batched_scatter( - handle, X_mutable_view, sort_plan.view(), col_batch_size); + raft::matrix::scatter( + handle, X_mutable_view, sort_plan_const_view, (value_idx)col_batch_size); reduction_op.scatter(handle, sort_plan.data_handle()); /** From 9c7dcef7a4b238382adab3e383c1ce047a3c1896 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Tue, 16 May 2023 13:14:29 -0700 Subject: [PATCH 15/53] Docstring change --- cpp/include/raft/matrix/detail/gather.cuh | 12 ++++++++---- cpp/include/raft/matrix/detail/scatter.cuh | 18 ++++++++++-------- cpp/include/raft/matrix/gather.cuh | 8 +++++--- cpp/include/raft/matrix/scatter.cuh | 15 ++++++++------- .../neighbors/detail/connect_components.cuh | 8 ++++---- 5 files changed, 35 insertions(+), 26 deletions(-) diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index 783269f86b..5e15dc10fd 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -351,18 +351,22 @@ void gather_if(const InputIteratorT in, /** * In-place gather elements in a row-major matrix according to a - * map. The length of the map is equal to the number of rows. + * map. The length of the map is equal to the number of rows. The + * map specifies new order in which rows are arranged, i.e. in the + * resulting matrix, row[i] would be replaced by row[matrix[i]]. * Batching is done on columns and an additional scratch space of * shape n_rows * cols_batch_size is created. For each batch, chunks * of columns from each row are copied into the appropriate location * in the scratch space and copied back to the corresponding locations * in the input matrix. - * @tparam value_idx - * @tparam value_t + * @tparam InputIteratorT + * @tparam MapIteratorT + * @tparam IndexT + * * @param[in] handle raft handle * @param[inout] in input matrix (n_rows * n_cols) * @param[in] map map containing the order in which rows are to be rearranged (n_rows) - * @param batch_size column batch size + * @param[in] batch_size column batch size */ template void gather(raft::device_resources const& handle, diff --git a/cpp/include/raft/matrix/detail/scatter.cuh b/cpp/include/raft/matrix/detail/scatter.cuh index 96cd9c5321..56e6874c3a 100644 --- a/cpp/include/raft/matrix/detail/scatter.cuh +++ b/cpp/include/raft/matrix/detail/scatter.cuh @@ -30,23 +30,25 @@ namespace detail { /** * In-place scatter elements in a row-major matrix according to a - * map. The length of the map is equal to the number of rows. - * Batching is done on columns and an additional scratch space of - * shape n_rows * cols_batch_size is created. For each batch, chunks - * of columns from each row are copied into the appropriate location - * in the scratch space and copied back to the corresponding locations - * in the input matrix. + * map. The length of the map is equal to the number of rows. The + * map specifies the destination index for each row, i.e. in the + * resulting matrix, row[map[i]] would be row[i]. Batching is done on + * columns and an additional scratch space of shape n_rows * cols_batch_size + * is created. For each batch, chunks of columns from each row are copied + * into the appropriate location in the scratch space and copied back to + * the corresponding locations in the input matrix. * @tparam InputIteratorT + * @tparam MapIteratorT * @tparam IndexT * * @param[in] handle raft handle - * @param[inout] in input matrix (n_rows * n_cols) + * @param[inout] inout input matrix (n_rows * n_cols) * @param[in] map map containing the destination index for each row (n_rows) * @param[in] batch_size column batch size */ template void scatter(raft::device_resources const& handle, - raft::device_matrix_view in, + raft::device_matrix_view inout, raft::device_vector_view map, IndexT batch_size) { diff --git a/cpp/include/raft/matrix/gather.cuh b/cpp/include/raft/matrix/gather.cuh index 651dc601ac..fc17bf8689 100644 --- a/cpp/include/raft/matrix/gather.cuh +++ b/cpp/include/raft/matrix/gather.cuh @@ -289,13 +289,15 @@ void gather_if(const raft::device_resources& handle, } /** - * @brief In-place gather elements in a row-major matrix according to a - * map. The length of the map is equal to the number of rows. + * In-place gather elements in a row-major matrix according to a + * map. The length of the map is equal to the number of rows. The + * map specifies new order in which rows are arranged, i.e. in the + * resulting matrix, row[i] would be replaced by row[matrix[i]]. * Batching is done on columns and an additional scratch space of * shape n_rows * cols_batch_size is created. For each batch, chunks * of columns from each row are copied into the appropriate location * in the scratch space and copied back to the corresponding locations - * in the input matrix + * in the input matrix. * * @tparam matrix_t * @tparam map_t diff --git a/cpp/include/raft/matrix/scatter.cuh b/cpp/include/raft/matrix/scatter.cuh index 37e0e38af2..dbc37c1161 100644 --- a/cpp/include/raft/matrix/scatter.cuh +++ b/cpp/include/raft/matrix/scatter.cuh @@ -22,13 +22,14 @@ namespace raft::matrix { /** - * @brief In-place scatter elements in a row-major matrix according to a - * map. The length of the map is equal to the number of rows. - * Batching is done on columns and an additional scratch space of - * shape n_rows * cols_batch_size is created. For each batch, chunks - * of columns from each row are copied into the appropriate location - * in the scratch space and copied back to the corresponding locations - * in the input matrix. + * In-place scatter elements in a row-major matrix according to a + * map. The length of the map is equal to the number of rows. The + * map specifies the destination index for each row, i.e. in the + * resulting matrix, row[map[i]] would be row[i]. Batching is done on + * columns and an additional scratch space of shape n_rows * cols_batch_size + * is created. For each batch, chunks of columns from each row are copied + * into the appropriate location in the scratch space and copied back to + * the corresponding locations in the input matrix. * * @tparam matrix_t * @tparam map_t diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 75ba9bdc94..ddc8dd0637 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -231,7 +231,9 @@ void perform_1nn(raft::device_resources const& handle, using OutT = raft::KeyValuePair; using ParamT = raft::distance::masked_l2_nn_params; - ParamT params{reduction_op, reduction_op, true, true}; + bool apply_sqrt = true; + bool init_out_buffer = true; + ParamT params{reduction_op, reduction_op, apply_sqrt, init_out_buffer}; size_t n_batches = raft::ceildiv(n_rows, batch_size); for (size_t bid = 0; bid < n_batches; bid++) { @@ -326,9 +328,7 @@ __global__ void min_components_by_color_kernel(value_idx* out_rows, * @tparam value_idx * @tparam value_t * @param[out] coo output edge list - * @param[in] out_indptr output indptr for ordering edge list - * @param[in] colors_indptr indptr of source components - * @param[in] colors_nn components of nearest neighbors to each source component + * @param[in] out_index output indptr for ordering edge list * @param[in] indices indices of source vertices for each component * @param[in] kvp indices and distances of each destination vertex for each component * @param[in] n_colors number of components From e050d4c5547862a9ff0a884c1a0e152ac4f9866a Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Tue, 16 May 2023 14:44:32 -0700 Subject: [PATCH 16/53] Styling changes --- cpp/include/raft/matrix/batched_rearrange.cuh | 40 +++++-------- cpp/include/raft/matrix/detail/gather.cuh | 41 +++++++------ cpp/include/raft/matrix/detail/scatter.cuh | 37 ++++++------ cpp/include/raft/matrix/gather.cuh | 21 +++---- cpp/include/raft/matrix/scatter.cuh | 31 ++++------ .../neighbors/detail/connect_components.cuh | 60 ++++++++----------- 6 files changed, 105 insertions(+), 125 deletions(-) diff --git a/cpp/include/raft/matrix/batched_rearrange.cuh b/cpp/include/raft/matrix/batched_rearrange.cuh index ba1da50b6b..5faecbc370 100644 --- a/cpp/include/raft/matrix/batched_rearrange.cuh +++ b/cpp/include/raft/matrix/batched_rearrange.cuh @@ -31,7 +31,7 @@ namespace matrix { * of columns from each row are copied into the appropriate location * in the scratch space and copied back to the corresponding locations * in the input matrix - * + * * @tparam value_idx * @tparam value_t * @@ -46,18 +46,14 @@ void batched_gather(raft::device_resources const& handle, raft::device_vector_view map, size_t col_batch_size) { - IdxT m = in.extent(0); - IdxT n = in.extent(1); - IdxT map_len = map.extent(0); - RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= (size_t)n, "col_batch_size should be > 0 and <= n"); + IdxT m = in.extent(0); + IdxT n = in.extent(1); + IdxT map_len = map.extent(0); + RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= (size_t)n, + "col_batch_size should be > 0 and <= n"); RAFT_EXPECTS(map_len == m, "size of map should be equal to the number of rows in input matrix"); - - detail::batched_gather(handle, - in.data_handle(), - map.data_handle(), - n, - m, - col_batch_size); + + detail::batched_gather(handle, in.data_handle(), map.data_handle(), n, m, col_batch_size); } /** @@ -68,7 +64,7 @@ void batched_gather(raft::device_resources const& handle, * of columns from each row are copied into the appropriate location * in the scratch space and copied back to the corresponding locations * in the input matrix - * + * * @tparam value_idx * @tparam value_t * @@ -83,18 +79,14 @@ void batched_scatter(raft::device_resources const& handle, raft::device_vector_view map, size_t col_batch_size) { - IdxT m = in.extent(0); - IdxT n = in.extent(1); - IdxT map_len = map.extent(0); - RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= (size_t)n, "col_batch_size should be > 0 and <= n"); + IdxT m = in.extent(0); + IdxT n = in.extent(1); + IdxT map_len = map.extent(0); + RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= (size_t)n, + "col_batch_size should be > 0 and <= n"); RAFT_EXPECTS(map_len == m, "size of map should be equal to the number of rows in input matrix"); - - detail::batched_scatter(handle, - in.data_handle(), - map.data_handle(), - n, - m, - col_batch_size); + + detail::batched_scatter(handle, in.data_handle(), map.data_handle(), n, m, col_batch_size); } }; // end namespace matrix }; // end namespace raft \ No newline at end of file diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index 5e15dc10fd..afc1eea9ae 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -15,13 +15,13 @@ */ #pragma once -#include -#include +#include #include +#include +#include #include -#include +#include #include -#include #include namespace raft { @@ -374,37 +374,40 @@ void gather(raft::device_resources const& handle, raft::device_vector_view map, IndexT batch_size) { - IndexT m = in.extent(0); - IndexT n = in.extent(1); + IndexT m = in.extent(0); + IndexT n = in.extent(1); auto exec_policy = handle.get_thrust_policy(); IndexT n_batches = raft::ceildiv(n, batch_size); for (IndexT bid = 0; bid < n_batches; bid++) { IndexT batch_offset = bid * batch_size; IndexT cols_per_batch = min(batch_size, n - batch_offset); - auto scratch_space = raft::make_device_vector(handle, n * cols_per_batch); + auto scratch_space = + raft::make_device_vector(handle, n * cols_per_batch); - auto scatter_op = - [in = in.data_handle(), map = map.data_handle(), batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n] __device__( - auto idx) { - IndexT row = idx / cols_per_batch; - IndexT col = idx % cols_per_batch; - return in[map[row] * n + batch_offset + col]; - }; + auto scatter_op = [in = in.data_handle(), + map = map.data_handle(), + batch_offset, + cols_per_batch = raft::util::FastIntDiv(cols_per_batch), + n] __device__(auto idx) { + IndexT row = idx / cols_per_batch; + IndexT col = idx % cols_per_batch; + return in[map[row] * n + batch_offset + col]; + }; raft::linalg::map_offset(handle, scratch_space.view(), scatter_op); - auto copy_op = [in = in.data_handle(), - map = map.data_handle(), + auto copy_op = [in = in.data_handle(), + map = map.data_handle(), scratch_space = scratch_space.data_handle(), batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n] __device__(auto idx) { - IndexT row = idx / cols_per_batch; - IndexT col = idx % cols_per_batch; + IndexT row = idx / cols_per_batch; + IndexT col = idx % cols_per_batch; return in[row * n + batch_offset + col] = scratch_space[idx]; }; auto counting = thrust::make_counting_iterator(0); thrust::for_each(exec_policy, counting, counting + n * batch_size, copy_op); - } + } } } // namespace detail diff --git a/cpp/include/raft/matrix/detail/scatter.cuh b/cpp/include/raft/matrix/detail/scatter.cuh index 56e6874c3a..3374731e3a 100644 --- a/cpp/include/raft/matrix/detail/scatter.cuh +++ b/cpp/include/raft/matrix/detail/scatter.cuh @@ -16,12 +16,12 @@ #pragma once #include -#include +#include #include +#include #include -#include +#include #include -#include #include namespace raft { @@ -52,8 +52,8 @@ void scatter(raft::device_resources const& handle, raft::device_vector_view map, IndexT batch_size) { - IndexT m = in.extent(0); - IndexT n = in.extent(1); + IndexT m = in.extent(0); + IndexT n = in.extent(1); auto stream = handle.get_stream(); auto exec_policy = handle.get_thrust_policy(); @@ -63,24 +63,27 @@ void scatter(raft::device_resources const& handle, for (IndexT bid = 0; bid < n_batches; bid++) { IndexT batch_offset = bid * batch_size; IndexT cols_per_batch = min(batch_size, n - batch_offset); - auto scratch_space = raft::make_device_vector(handle, m * cols_per_batch); + auto scratch_space = + raft::make_device_vector(handle, m * cols_per_batch); - auto scatter_op = - [in = in.data_handle(), map = map.data_handle(), batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n] __device__( - auto idx) { - IndexT row = idx / cols_per_batch; - IndexT col = idx % cols_per_batch; - return in[row * n + batch_offset + col]; - }; + auto scatter_op = [in = in.data_handle(), + map = map.data_handle(), + batch_offset, + cols_per_batch = raft::util::FastIntDiv(cols_per_batch), + n] __device__(auto idx) { + IndexT row = idx / cols_per_batch; + IndexT col = idx % cols_per_batch; + return in[row * n + batch_offset + col]; + }; raft::linalg::map_offset(handle, scratch_space.view(), scatter_op); - auto copy_op = [in = in.data_handle(), - map = map.data_handle(), + auto copy_op = [in = in.data_handle(), + map = map.data_handle(), scratch_space = scratch_space.data_handle(), batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n] __device__(auto idx) { - IndexT row = idx / cols_per_batch; - IndexT col = idx % cols_per_batch; + IndexT row = idx / cols_per_batch; + IndexT col = idx % cols_per_batch; in[map[row] * n + batch_offset + col] = scratch_space[idx]; }; auto counting = thrust::make_counting_iterator(0); diff --git a/cpp/include/raft/matrix/gather.cuh b/cpp/include/raft/matrix/gather.cuh index fc17bf8689..8f43e02427 100644 --- a/cpp/include/raft/matrix/gather.cuh +++ b/cpp/include/raft/matrix/gather.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -298,7 +298,7 @@ void gather_if(const raft::device_resources& handle, * of columns from each row are copied into the appropriate location * in the scratch space and copied back to the corresponding locations * in the input matrix. - * + * * @tparam matrix_t * @tparam map_t * @tparam idx_t @@ -308,24 +308,19 @@ void gather_if(const raft::device_resources& handle, * @param[in] map map containing the order in which rows are to be rearranged (n_rows) * @param[in] col_batch_size column batch size */ -template +template void gather(raft::device_resources const& handle, raft::device_matrix_view in, raft::device_vector_view map, idx_t col_batch_size) { - idx_t m = in.extent(0); - idx_t n = in.extent(1); - idx_t map_len = map.extent(0); + idx_t m = in.extent(0); + idx_t n = in.extent(1); + idx_t map_len = map.extent(0); RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= n, "col_batch_size should be > 0 and <= n"); RAFT_EXPECTS(map_len == m, "size of map should be equal to the number of rows in input matrix"); - - detail::gather(handle, - in, - map, - col_batch_size); + + detail::gather(handle, in, map, col_batch_size); } /** @} */ // end of group matrix_gather diff --git a/cpp/include/raft/matrix/scatter.cuh b/cpp/include/raft/matrix/scatter.cuh index dbc37c1161..cb9bbb26f6 100644 --- a/cpp/include/raft/matrix/scatter.cuh +++ b/cpp/include/raft/matrix/scatter.cuh @@ -1,5 +1,5 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. +/** + * Copyright (c) 2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,7 +30,7 @@ namespace raft::matrix { * is created. For each batch, chunks of columns from each row are copied * into the appropriate location in the scratch space and copied back to * the corresponding locations in the input matrix. - * + * * @tparam matrix_t * @tparam map_t * @tparam idx_t @@ -40,24 +40,19 @@ namespace raft::matrix { * @param[in] map map containing the order in which rows are to be rearranged (n_rows) * @param[in] col_batch_size column batch size */ -template +template void scatter(raft::device_resources const& handle, - raft::device_matrix_view in, - raft::device_vector_view map, - idx_t col_batch_size) + raft::device_matrix_view in, + raft::device_vector_view map, + idx_t col_batch_size) { - idx_t m = in.extent(0); - idx_t n = in.extent(1); - idx_t map_len = map.extent(0); + idx_t m = in.extent(0); + idx_t n = in.extent(1); + idx_t map_len = map.extent(0); RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= n, "col_batch_size should be > 0 and <= n"); RAFT_EXPECTS(map_len == m, "size of map should be equal to the number of rows in input matrix"); - - detail::scatter(handle, - in, - map, - col_batch_size); + + detail::scatter(handle, in, map, col_batch_size); } -} \ No newline at end of file +} // namespace raft::matrix \ No newline at end of file diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index ddc8dd0637..23d95994c1 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -15,15 +15,15 @@ */ #pragma once -#include "raft/core/device_mdarray.hpp" -#include "raft/core/device_mdspan.hpp" -#include "raft/linalg/map.cuh" #include #include +#include +#include #include #include #include +#include #include #include #include @@ -216,14 +216,9 @@ void perform_1nn(raft::device_resources const& handle, raft::sparse::convert::sorted_coo_to_csr( colors, n_rows, colors_group_idxs.data_handle(), n_components + 1, stream); - auto x_norm= raft::make_device_vector(handle, (value_idx)n_rows); - raft::linalg::rowNorm(x_norm.data_handle(), - X, - n_cols, - n_rows, - raft::linalg::L2Norm, - true, - stream); + auto x_norm = raft::make_device_vector(handle, (value_idx)n_rows); + raft::linalg::rowNorm( + x_norm.data_handle(), X, n_cols, n_rows, raft::linalg::L2Norm, true, stream); auto kvp_view = raft::make_device_vector_view, value_idx>(kvp, n_rows); @@ -231,7 +226,7 @@ void perform_1nn(raft::device_resources const& handle, using OutT = raft::KeyValuePair; using ParamT = raft::distance::masked_l2_nn_params; - bool apply_sqrt = true; + bool apply_sqrt = true; bool init_out_buffer = true; ParamT params{reduction_op, reduction_op, apply_sqrt, init_out_buffer}; @@ -243,7 +238,8 @@ void perform_1nn(raft::device_resources const& handle, auto X_view = raft::make_device_matrix_view( X + batch_offset * n_cols, rows_per_batch, n_cols); - auto x_norm_view = raft::make_device_vector_view(x_norm.data_handle() + batch_offset, rows_per_batch); + auto x_norm_view = raft::make_device_vector_view( + x_norm.data_handle() + batch_offset, rows_per_batch); auto mask_op = [colors, n_components = raft::util::FastIntDiv(n_components), batch_offset] __device__(value_idx idx) { @@ -251,7 +247,8 @@ void perform_1nn(raft::device_resources const& handle, value_idx col = idx % n_components; return colors[batch_offset + row] != col; }; - auto adj_view = raft::make_device_matrix_view(adj.data_handle(), rows_per_batch, n_components); + auto adj_view = raft::make_device_matrix_view( + adj.data_handle(), rows_per_batch, n_components); raft::linalg::map_offset(handle, adj_view, mask_op); raft::distance::masked_l2_nn( @@ -379,8 +376,8 @@ void min_components_by_color(raft::sparse::COO& coo, * @param[in] col_batch_size the input data is sorted and 'unsorted' based on color. An additional * scratch space buffer of shape (n_rows, col_batch_size) is created for this. Usually, this * parameter affects the memory consumption more drastically than the col_batch_size with a marginal - * increase in compute time as the col_batch_size is reduced. default 0 indicates that no batching is - * done + * increase in compute time as the col_batch_size is reduced. default 0 indicates that no batching + * is done */ template void connect_components(raft::device_resources const& handle, @@ -393,26 +390,21 @@ void connect_components(raft::device_resources const& handle, size_t row_batch_size, size_t col_batch_size) { - RAFT_EXPECTS(col_batch_size <= n_cols, - "col_batch_size should be >= 0 and <= n_cols"); - RAFT_EXPECTS(row_batch_size <= n_rows, - "row_batch_size should be >= 0 and <= n_rows"); - if (row_batch_size == 0) { - row_batch_size = n_rows; - } - if (col_batch_size == 0) { - col_batch_size = n_cols; - } + RAFT_EXPECTS(col_batch_size <= n_cols, "col_batch_size should be >= 0 and <= n_cols"); + RAFT_EXPECTS(row_batch_size <= n_rows, "row_batch_size should be >= 0 and <= n_rows"); + if (row_batch_size == 0) { row_batch_size = n_rows; } + if (col_batch_size == 0) { col_batch_size = n_cols; } auto stream = handle.get_stream(); rmm::device_uvector colors(n_rows, stream); // Normalize colors so they are drawn from a monotonically increasing set bool zero_based = true; - raft::label::make_monotonic(colors.data(), const_cast(orig_colors), n_rows, stream, zero_based); + raft::label::make_monotonic( + colors.data(), const_cast(orig_colors), n_rows, stream, zero_based); auto sort_plan = raft::make_device_vector(handle, (value_idx)n_rows); - raft::linalg::map_offset(handle, sort_plan.view(), [] __device__(value_idx idx) {return idx;}); + raft::linalg::map_offset(handle, sort_plan.view(), [] __device__(value_idx idx) { return idx; }); thrust::sort_by_key( handle.get_thrust_policy(), colors.data(), colors.data() + n_rows, sort_plan.data_handle()); @@ -420,10 +412,11 @@ void connect_components(raft::device_resources const& handle, // Modify the reduction operation based on the sort plan. This is particularly needed for HDBSCAN reduction_op.gather(handle, sort_plan.data_handle()); - auto X_mutable_view = raft::make_device_matrix_view(const_cast(X), n_rows, n_cols); - auto sort_plan_const_view = raft::make_device_vector_view(sort_plan.data_handle(), n_rows); - raft::matrix::gather( - handle, X_mutable_view, sort_plan_const_view, (value_idx)col_batch_size); + auto X_mutable_view = + raft::make_device_matrix_view(const_cast(X), n_rows, n_cols); + auto sort_plan_const_view = + raft::make_device_vector_view(sort_plan.data_handle(), n_rows); + raft::matrix::gather(handle, X_mutable_view, sort_plan_const_view, (value_idx)col_batch_size); /** * First compute 1-nn for all colors where the color of each data point @@ -482,8 +475,7 @@ void connect_components(raft::device_resources const& handle, n_rows, stream); - raft::matrix::scatter( - handle, X_mutable_view, sort_plan_const_view, (value_idx)col_batch_size); + raft::matrix::scatter(handle, X_mutable_view, sort_plan_const_view, (value_idx)col_batch_size); reduction_op.scatter(handle, sort_plan.data_handle()); /** From 2162de323fd7543e41a2a2d439edb7dc08961142 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Thu, 18 May 2023 08:15:51 -0700 Subject: [PATCH 17/53] Some changes after PR reviews --- cpp/include/raft/matrix/detail/gather.cuh | 9 ++++++--- cpp/include/raft/matrix/detail/scatter.cuh | 13 ++++++++----- cpp/include/raft/matrix/gather.cuh | 8 +++++--- cpp/include/raft/matrix/scatter.cuh | 6 ++++-- .../neighbors/detail/connect_components.cuh | 19 ++++++++++--------- 5 files changed, 33 insertions(+), 22 deletions(-) diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index 1e9b5c3162..76a6093002 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -350,15 +350,18 @@ void gather_if(const InputIteratorT in, } /** - * In-place gather elements in a row-major matrix according to a + * @brief In-place gather elements in a row-major matrix according to a * map. The length of the map is equal to the number of rows. The - * map specifies new order in which rows are arranged, i.e. in the - * resulting matrix, row[i] would be replaced by row[matrix[i]]. + * map specifies new order in which rows of the input matrix are rearranged, + * i.e. in the resulting matrix, row i is assigned to the position map[i]. + * example, the matrix [[1, 2, 3], [4, 5, 6], [7, 8, 9]] with the + * map [2, 0, 1] will be transformed to [[7, 8, 9], [1, 2, 3], [4, 5, 6]]. * Batching is done on columns and an additional scratch space of * shape n_rows * cols_batch_size is created. For each batch, chunks * of columns from each row are copied into the appropriate location * in the scratch space and copied back to the corresponding locations * in the input matrix. + * * @tparam InputIteratorT * @tparam MapIteratorT * @tparam IndexT diff --git a/cpp/include/raft/matrix/detail/scatter.cuh b/cpp/include/raft/matrix/detail/scatter.cuh index 5f3c9699b3..ed2218fd38 100644 --- a/cpp/include/raft/matrix/detail/scatter.cuh +++ b/cpp/include/raft/matrix/detail/scatter.cuh @@ -31,14 +31,17 @@ namespace matrix { namespace detail { /** - * In-place scatter elements inout a row-major matrix according to a + * @brief In-place scatter elements in a row-major matrix according to a * map. The length of the map is equal to the number of rows. The - * map specifies the destination index for each row, i.e. inout the - * resulting matrix, row[map[i]] would be row[i]. Batching is done on + * map specifies the destination index for each row, i.e. in the + * resulting matrix, row map[i] is assigned to row i. For example, + * the matrix [[1, 2, 3], [4, 5, 6], [7, 8, 9]] with the map [2, 0, 1] will + * be transformed to [[4, 5, 6], [7, 8, 9], [1, 2, 3]]. Batching is done on * columns and an additional scratch space of shape n_rows * cols_batch_size * is created. For each batch, chunks of columns from each row are copied - * into the appropriate location inout the scratch space and copied back to - * the corresponding locations inout the input matrix. + * into the appropriate location in the scratch space and copied back to + * the corresponding locations in the input matrix. + * * @tparam InputIteratorT * @tparam MapIteratorT * @tparam IndexT diff --git a/cpp/include/raft/matrix/gather.cuh b/cpp/include/raft/matrix/gather.cuh index a8e1ae632e..6b62f72aeb 100644 --- a/cpp/include/raft/matrix/gather.cuh +++ b/cpp/include/raft/matrix/gather.cuh @@ -290,10 +290,12 @@ void gather_if(const raft::resources& handle, } /** - * In-place gather elements in a row-major matrix according to a + * @brief In-place gather elements in a row-major matrix according to a * map. The length of the map is equal to the number of rows. The - * map specifies new order in which rows are arranged, i.e. in the - * resulting matrix, row[i] would be replaced by row[matrix[i]]. + * map specifies new order in which rows of the input matrix are rearranged, + * i.e. in the resulting matrix, row i is assigned to the position map[i]. + * example, the matrix [[1, 2, 3], [4, 5, 6], [7, 8, 9]] with the + * map [2, 0, 1] will be transformed to [[7, 8, 9], [1, 2, 3], [4, 5, 6]]. * Batching is done on columns and an additional scratch space of * shape n_rows * cols_batch_size is created. For each batch, chunks * of columns from each row are copied into the appropriate location diff --git a/cpp/include/raft/matrix/scatter.cuh b/cpp/include/raft/matrix/scatter.cuh index 36692b6800..6b1f5078e0 100644 --- a/cpp/include/raft/matrix/scatter.cuh +++ b/cpp/include/raft/matrix/scatter.cuh @@ -22,10 +22,12 @@ namespace raft::matrix { /** - * In-place scatter elements in a row-major matrix according to a + * @brief In-place scatter elements in a row-major matrix according to a * map. The length of the map is equal to the number of rows. The * map specifies the destination index for each row, i.e. in the - * resulting matrix, row[map[i]] would be row[i]. Batching is done on + * resulting matrix, row map[i] is assigned to row i. For example, + * the matrix [[1, 2, 3], [4, 5, 6], [7, 8, 9]] with the map [2, 0, 1] will + * be transformed to [[4, 5, 6], [7, 8, 9], [1, 2, 3]]. Batching is done on * columns and an additional scratch space of shape n_rows * cols_batch_size * is created. For each batch, chunks of columns from each row are copied * into the appropriate location in the scratch space and copied back to diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 16fa8c043b..628586a9ea 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -37,7 +37,6 @@ #include #include #include -#include #include @@ -216,7 +215,8 @@ void perform_1nn(raft::resources const& handle, size_t batch_size, red_op reduction_op) { - auto stream = resource::get_cuda_stream(handle); + auto stream = resource::get_cuda_stream(handle); + auto exec_policy = resource::get_thrust_policy(handle); value_idx n_components = get_n_components(colors, n_rows, stream); @@ -275,7 +275,7 @@ void perform_1nn(raft::resources const& handle, kvp_view); } LookupColorOp extract_colors_op(colors); - thrust::transform(rmm::exec_policy(stream), kvp, kvp + n_rows, nn_colors, extract_colors_op); + thrust::transform(exec_policy, kvp, kvp + n_rows, nn_colors, extract_colors_op); } /** @@ -291,21 +291,22 @@ void perform_1nn(raft::resources const& handle, * @param stream stream for which to order CUDA operations */ template -void sort_by_color(value_idx* colors, +void sort_by_color(raft::resources const& handle, + value_idx* colors, value_idx* nn_colors, raft::KeyValuePair* kvp, value_idx* src_indices, - size_t n_rows, - cudaStream_t stream) + size_t n_rows) { + auto exec_policy = resource::get_thrust_policy(handle); thrust::counting_iterator arg_sort_iter(0); - thrust::copy(rmm::exec_policy(stream), arg_sort_iter, arg_sort_iter + n_rows, src_indices); + thrust::copy(exec_policy, arg_sort_iter, arg_sort_iter + n_rows, src_indices); auto keys = thrust::make_zip_iterator( thrust::make_tuple(colors, nn_colors, (KeyValuePair*)kvp)); auto vals = thrust::make_zip_iterator(thrust::make_tuple(src_indices)); // get all the colors in contiguous locations so we can map them to warps. - thrust::sort_by_key(rmm::exec_policy(stream), keys, keys + n_rows, vals, TupleComp()); + thrust::sort_by_key(exec_policy, keys, keys + n_rows, vals, TupleComp()); } template @@ -456,7 +457,7 @@ void connect_components(raft::resources const& handle, // max_color + 1 = number of connected components // sort nn_colors by key w/ original colors sort_by_color( - colors.data(), nn_colors.data(), temp_inds_dists.data(), src_indices.data(), n_rows, stream); + handle, colors.data(), nn_colors.data(), temp_inds_dists.data(), src_indices.data(), n_rows); /** * Take the min for any duplicate colors From 13855737200b953be226e2ef1c6c4c6b29f11003 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Fri, 19 May 2023 12:26:48 -0700 Subject: [PATCH 18/53] rbug fixes --- cpp/include/raft/matrix/detail/gather.cuh | 19 +++---- cpp/include/raft/matrix/detail/scatter.cuh | 1 + .../neighbors/detail/connect_components.cuh | 49 +++++++++++++++---- .../sparse/neighbors/connect_components.cu | 4 +- 4 files changed, 52 insertions(+), 21 deletions(-) diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index 76a6093002..268a9a01c2 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -367,18 +367,18 @@ void gather_if(const InputIteratorT in, * @tparam IndexT * * @param[in] handle raft handle - * @param[inout] in input matrix (n_rows * n_cols) + * @param[inout] inout input matrix (n_rows * n_cols) * @param[in] map map containing the order in which rows are to be rearranged (n_rows) * @param[in] batch_size column batch size */ template void gather(raft::resources const& handle, - raft::device_matrix_view in, + raft::device_matrix_view inout, raft::device_vector_view map, IndexT batch_size) { - IndexT m = in.extent(0); - IndexT n = in.extent(1); + IndexT m = inout.extent(0); + IndexT n = inout.extent(1); auto exec_policy = resource::get_thrust_policy(handle); IndexT n_batches = raft::ceildiv(n, batch_size); @@ -386,19 +386,19 @@ void gather(raft::resources const& handle, IndexT batch_offset = bid * batch_size; IndexT cols_per_batch = min(batch_size, n - batch_offset); auto scratch_space = - raft::make_device_vector(handle, n * cols_per_batch); + raft::make_device_vector(handle, m * cols_per_batch); - auto scatter_op = [in = in.data_handle(), + auto scatter_op = [inout = inout.data_handle(), map = map.data_handle(), batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n] __device__(auto idx) { IndexT row = idx / cols_per_batch; IndexT col = idx % cols_per_batch; - return in[map[row] * n + batch_offset + col]; + return inout[map[row] * n + batch_offset + col]; }; raft::linalg::map_offset(handle, scratch_space.view(), scatter_op); - auto copy_op = [in = in.data_handle(), + auto copy_op = [inout = inout.data_handle(), map = map.data_handle(), scratch_space = scratch_space.data_handle(), batch_offset, @@ -406,7 +406,8 @@ void gather(raft::resources const& handle, n] __device__(auto idx) { IndexT row = idx / cols_per_batch; IndexT col = idx % cols_per_batch; - return in[row * n + batch_offset + col] = scratch_space[idx]; + inout[row * n + batch_offset + col] = scratch_space[idx]; + return; }; auto counting = thrust::make_counting_iterator(0); thrust::for_each(exec_policy, counting, counting + n * batch_size, copy_op); diff --git a/cpp/include/raft/matrix/detail/scatter.cuh b/cpp/include/raft/matrix/detail/scatter.cuh index ed2218fd38..63f804ff99 100644 --- a/cpp/include/raft/matrix/detail/scatter.cuh +++ b/cpp/include/raft/matrix/detail/scatter.cuh @@ -90,6 +90,7 @@ void scatter(raft::resources const& handle, IndexT row = idx / cols_per_batch; IndexT col = idx % cols_per_batch; inout[map[row] * n + batch_offset + col] = scratch_space[idx]; + return; }; auto counting = thrust::make_counting_iterator(0); thrust::for_each(exec_policy, counting, counting + m * batch_size, copy_op); diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 628586a9ea..732bfca576 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -15,6 +15,7 @@ */ #pragma once +#include "raft/core/logger-macros.hpp" #include #include #include @@ -215,6 +216,7 @@ void perform_1nn(raft::resources const& handle, size_t batch_size, red_op reduction_op) { + RAFT_LOG_INFO("perform_1nn start"); auto stream = resource::get_cuda_stream(handle); auto exec_policy = resource::get_thrust_policy(handle); @@ -227,12 +229,14 @@ void perform_1nn(raft::resources const& handle, auto colors_group_idxs = raft::make_device_vector(handle, n_components + 1); raft::sparse::convert::sorted_coo_to_csr( colors, n_rows, colors_group_idxs.data_handle(), n_components + 1, stream); + + auto group_idxs_view = raft::make_device_vector_view(colors_group_idxs.data_handle() + 1, n_components); auto x_norm = raft::make_device_vector(handle, (value_idx)n_rows); raft::linalg::rowNorm( x_norm.data_handle(), X, n_cols, n_rows, raft::linalg::L2Norm, true, stream); - auto kvp_view = - raft::make_device_vector_view, value_idx>(kvp, n_rows); + + RAFT_LOG_INFO("X norm computed"); auto adj = raft::make_device_matrix(handle, batch_size, n_components); using OutT = raft::KeyValuePair; @@ -242,16 +246,26 @@ void perform_1nn(raft::resources const& handle, bool init_out_buffer = true; ParamT params{reduction_op, reduction_op, apply_sqrt, init_out_buffer}; + auto X_full_view = raft::make_device_matrix_view( + X, n_rows, n_cols); + size_t n_batches = raft::ceildiv(n_rows, batch_size); + RAFT_LOG_INFO("n_batches %zu", n_batches); for (size_t bid = 0; bid < n_batches; bid++) { + RAFT_LOG_INFO("current batch bid %zu", bid); size_t batch_offset = bid * batch_size; size_t rows_per_batch = min(batch_size, n_rows - batch_offset); + RAFT_LOG_INFO("rows_per_batch %zu", rows_per_batch); - auto X_view = raft::make_device_matrix_view( + auto X_batch_view = raft::make_device_matrix_view( X + batch_offset * n_cols, rows_per_batch, n_cols); + + RAFT_LOG_INFO("X_batch_view created"); - auto x_norm_view = raft::make_device_vector_view( + auto x_norm_batch_view = raft::make_device_vector_view( x_norm.data_handle() + batch_offset, rows_per_batch); + + RAFT_LOG_INFO("X_norm_batch_view created"); auto mask_op = [colors, n_components = raft::util::FastIntDiv(n_components), batch_offset] __device__(value_idx idx) { @@ -261,18 +275,29 @@ void perform_1nn(raft::resources const& handle, }; auto adj_view = raft::make_device_matrix_view( adj.data_handle(), rows_per_batch, n_components); + + RAFT_LOG_INFO("adj view created"); raft::linalg::map_offset(handle, adj_view, mask_op); + RAFT_LOG_INFO("adj map_offset done"); + auto kvp_view = + raft::make_device_vector_view, value_idx>(kvp + batch_offset, rows_per_batch); + + RAFT_LOG_INFO("kvp view created"); + cudaDeviceSynchronize(); + RAFT_LOG_INFO("bid %d Done until start of masked_nn", bid); raft::distance::masked_l2_nn( handle, params, - X_view, - X_view, - x_norm_view, - x_norm_view, + X_batch_view, + X_full_view, + x_norm_batch_view, + x_norm.view(), adj_view, - raft::make_device_vector_view(colors_group_idxs.data_handle() + 1, n_components), + group_idxs_view, kvp_view); + cudaDeviceSynchronize(); + RAFT_LOG_INFO("bid %d Done until end of masked_nn", bid); } LookupColorOp extract_colors_op(colors); thrust::transform(exec_policy, kvp, kvp + n_rows, nn_colors, extract_colors_op); @@ -403,6 +428,7 @@ void connect_components(raft::resources const& handle, size_t row_batch_size, size_t col_batch_size) { + RAFT_LOG_INFO("connect_components_start"); RAFT_EXPECTS(col_batch_size <= n_cols, "col_batch_size should be >= 0 and <= n_cols"); RAFT_EXPECTS(row_batch_size <= n_rows, "row_batch_size should be >= 0 and <= n_rows"); if (row_batch_size == 0) { row_batch_size = n_rows; } @@ -423,7 +449,8 @@ void connect_components(raft::resources const& handle, colors.data(), colors.data() + n_rows, sort_plan.data_handle()); - + + RAFT_LOG_INFO("sort plan created"); // Modify the reduction operation based on the sort plan. This is particularly needed for HDBSCAN reduction_op.gather(handle, sort_plan.data_handle()); @@ -433,6 +460,7 @@ void connect_components(raft::resources const& handle, raft::make_device_vector_view(sort_plan.data_handle(), n_rows); raft::matrix::gather(handle, X_mutable_view, sort_plan_const_view, (value_idx)col_batch_size); + RAFT_LOG_INFO("X mutable view created"); /** * First compute 1-nn for all colors where the color of each data point * is guaranteed to be != color of its nearest neighbor. @@ -459,6 +487,7 @@ void connect_components(raft::resources const& handle, sort_by_color( handle, colors.data(), nn_colors.data(), temp_inds_dists.data(), src_indices.data(), n_rows); + RAFT_LOG_INFO("sort_by_colors done"); /** * Take the min for any duplicate colors */ diff --git a/cpp/test/sparse/neighbors/connect_components.cu b/cpp/test/sparse/neighbors/connect_components.cu index f50d62cf15..babe940e81 100644 --- a/cpp/test/sparse/neighbors/connect_components.cu +++ b/cpp/test/sparse/neighbors/connect_components.cu @@ -124,8 +124,8 @@ class ConnectComponentsTest params.n_row, params.n_col, red_op, - params.n_row, - params.n_col); + 3, + 2); /** * Construct final edge list From a7ba987a9adcf5e8f04e1a57624035ff00f90e9b Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Fri, 19 May 2023 13:32:32 -0700 Subject: [PATCH 19/53] Bug fixes --- cpp/CMakeLists.txt | 8 ++- cpp/include/raft/matrix/detail/gather.cuh | 12 ++-- cpp/include/raft/matrix/detail/scatter.cuh | 3 +- .../neighbors/detail/connect_components.cuh | 60 +++++++------------ .../sparse/neighbors/connect_components.cu | 4 +- 5 files changed, 35 insertions(+), 52 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index eb35554768..3d498783f3 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -602,7 +602,9 @@ target_link_libraries(raft::raft INTERFACE # Use `rapids_export` for 22.04 as it will have COMPONENT support rapids_export( INSTALL raft - EXPORT_SET raft-exports COMPONENTS ${raft_components} COMPONENTS_EXPORT_SET ${raft_export_sets} + EXPORT_SET raft-exports + COMPONENTS ${raft_components} + COMPONENTS_EXPORT_SET ${raft_export_sets} GLOBAL_TARGETS raft compiled distributed NAMESPACE raft:: DOCUMENTATION doc_string @@ -613,7 +615,9 @@ rapids_export( # * build export ------------------------------------------------------------- rapids_export( BUILD raft - EXPORT_SET raft-exports COMPONENTS ${raft_components} COMPONENTS_EXPORT_SET ${raft_export_sets} + EXPORT_SET raft-exports + COMPONENTS ${raft_components} + COMPONENTS_EXPORT_SET ${raft_export_sets} GLOBAL_TARGETS raft compiled distributed DOCUMENTATION doc_string NAMESPACE raft:: diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index 268a9a01c2..c05af17cb0 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -388,8 +388,8 @@ void gather(raft::resources const& handle, auto scratch_space = raft::make_device_vector(handle, m * cols_per_batch); - auto scatter_op = [inout = inout.data_handle(), - map = map.data_handle(), + auto scatter_op = [inout = inout.data_handle(), + map = map.data_handle(), batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n] __device__(auto idx) { @@ -398,19 +398,19 @@ void gather(raft::resources const& handle, return inout[map[row] * n + batch_offset + col]; }; raft::linalg::map_offset(handle, scratch_space.view(), scatter_op); - auto copy_op = [inout = inout.data_handle(), + auto copy_op = [inout = inout.data_handle(), map = map.data_handle(), scratch_space = scratch_space.data_handle(), batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n] __device__(auto idx) { - IndexT row = idx / cols_per_batch; - IndexT col = idx % cols_per_batch; + IndexT row = idx / cols_per_batch; + IndexT col = idx % cols_per_batch; inout[row * n + batch_offset + col] = scratch_space[idx]; return; }; auto counting = thrust::make_counting_iterator(0); - thrust::for_each(exec_policy, counting, counting + n * batch_size, copy_op); + thrust::for_each(exec_policy, counting, counting + m * cols_per_batch, copy_op); } } diff --git a/cpp/include/raft/matrix/detail/scatter.cuh b/cpp/include/raft/matrix/detail/scatter.cuh index 63f804ff99..c6e4bf5c6e 100644 --- a/cpp/include/raft/matrix/detail/scatter.cuh +++ b/cpp/include/raft/matrix/detail/scatter.cuh @@ -60,7 +60,6 @@ void scatter(raft::resources const& handle, IndexT m = inout.extent(0); IndexT n = inout.extent(1); - auto stream = resource::get_cuda_stream(handle); auto exec_policy = resource::get_thrust_policy(handle); IndexT n_batches = raft::ceildiv(n, batch_size); @@ -93,7 +92,7 @@ void scatter(raft::resources const& handle, return; }; auto counting = thrust::make_counting_iterator(0); - thrust::for_each(exec_policy, counting, counting + m * batch_size, copy_op); + thrust::for_each(exec_policy, counting, counting + m * cols_per_batch, copy_op); } } diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 732bfca576..d95d3192aa 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -15,7 +15,6 @@ */ #pragma once -#include "raft/core/logger-macros.hpp" #include #include #include @@ -216,7 +215,6 @@ void perform_1nn(raft::resources const& handle, size_t batch_size, red_op reduction_op) { - RAFT_LOG_INFO("perform_1nn start"); auto stream = resource::get_cuda_stream(handle); auto exec_policy = resource::get_thrust_policy(handle); @@ -229,14 +227,13 @@ void perform_1nn(raft::resources const& handle, auto colors_group_idxs = raft::make_device_vector(handle, n_components + 1); raft::sparse::convert::sorted_coo_to_csr( colors, n_rows, colors_group_idxs.data_handle(), n_components + 1, stream); - - auto group_idxs_view = raft::make_device_vector_view(colors_group_idxs.data_handle() + 1, n_components); + + auto group_idxs_view = + raft::make_device_vector_view(colors_group_idxs.data_handle() + 1, n_components); auto x_norm = raft::make_device_vector(handle, (value_idx)n_rows); raft::linalg::rowNorm( x_norm.data_handle(), X, n_cols, n_rows, raft::linalg::L2Norm, true, stream); - - RAFT_LOG_INFO("X norm computed"); auto adj = raft::make_device_matrix(handle, batch_size, n_components); using OutT = raft::KeyValuePair; @@ -246,26 +243,20 @@ void perform_1nn(raft::resources const& handle, bool init_out_buffer = true; ParamT params{reduction_op, reduction_op, apply_sqrt, init_out_buffer}; - auto X_full_view = raft::make_device_matrix_view( - X, n_rows, n_cols); + auto X_full_view = raft::make_device_matrix_view(X, n_rows, n_cols); size_t n_batches = raft::ceildiv(n_rows, batch_size); - RAFT_LOG_INFO("n_batches %zu", n_batches); + for (size_t bid = 0; bid < n_batches; bid++) { - RAFT_LOG_INFO("current batch bid %zu", bid); size_t batch_offset = bid * batch_size; size_t rows_per_batch = min(batch_size, n_rows - batch_offset); - RAFT_LOG_INFO("rows_per_batch %zu", rows_per_batch); auto X_batch_view = raft::make_device_matrix_view( X + batch_offset * n_cols, rows_per_batch, n_cols); - - RAFT_LOG_INFO("X_batch_view created"); auto x_norm_batch_view = raft::make_device_vector_view( x_norm.data_handle() + batch_offset, rows_per_batch); - - RAFT_LOG_INFO("X_norm_batch_view created"); + auto mask_op = [colors, n_components = raft::util::FastIntDiv(n_components), batch_offset] __device__(value_idx idx) { @@ -275,29 +266,22 @@ void perform_1nn(raft::resources const& handle, }; auto adj_view = raft::make_device_matrix_view( adj.data_handle(), rows_per_batch, n_components); - - RAFT_LOG_INFO("adj view created"); + raft::linalg::map_offset(handle, adj_view, mask_op); - RAFT_LOG_INFO("adj map_offset done"); auto kvp_view = - raft::make_device_vector_view, value_idx>(kvp + batch_offset, rows_per_batch); - - RAFT_LOG_INFO("kvp view created"); - cudaDeviceSynchronize(); - RAFT_LOG_INFO("bid %d Done until start of masked_nn", bid); - raft::distance::masked_l2_nn( - handle, - params, - X_batch_view, - X_full_view, - x_norm_batch_view, - x_norm.view(), - adj_view, - group_idxs_view, - kvp_view); - cudaDeviceSynchronize(); - RAFT_LOG_INFO("bid %d Done until end of masked_nn", bid); + raft::make_device_vector_view, value_idx>( + kvp + batch_offset, rows_per_batch); + + raft::distance::masked_l2_nn(handle, + params, + X_batch_view, + X_full_view, + x_norm_batch_view, + x_norm.view(), + adj_view, + group_idxs_view, + kvp_view); } LookupColorOp extract_colors_op(colors); thrust::transform(exec_policy, kvp, kvp + n_rows, nn_colors, extract_colors_op); @@ -428,7 +412,6 @@ void connect_components(raft::resources const& handle, size_t row_batch_size, size_t col_batch_size) { - RAFT_LOG_INFO("connect_components_start"); RAFT_EXPECTS(col_batch_size <= n_cols, "col_batch_size should be >= 0 and <= n_cols"); RAFT_EXPECTS(row_batch_size <= n_rows, "row_batch_size should be >= 0 and <= n_rows"); if (row_batch_size == 0) { row_batch_size = n_rows; } @@ -449,8 +432,7 @@ void connect_components(raft::resources const& handle, colors.data(), colors.data() + n_rows, sort_plan.data_handle()); - - RAFT_LOG_INFO("sort plan created"); + // Modify the reduction operation based on the sort plan. This is particularly needed for HDBSCAN reduction_op.gather(handle, sort_plan.data_handle()); @@ -460,7 +442,6 @@ void connect_components(raft::resources const& handle, raft::make_device_vector_view(sort_plan.data_handle(), n_rows); raft::matrix::gather(handle, X_mutable_view, sort_plan_const_view, (value_idx)col_batch_size); - RAFT_LOG_INFO("X mutable view created"); /** * First compute 1-nn for all colors where the color of each data point * is guaranteed to be != color of its nearest neighbor. @@ -487,7 +468,6 @@ void connect_components(raft::resources const& handle, sort_by_color( handle, colors.data(), nn_colors.data(), temp_inds_dists.data(), src_indices.data(), n_rows); - RAFT_LOG_INFO("sort_by_colors done"); /** * Take the min for any duplicate colors */ diff --git a/cpp/test/sparse/neighbors/connect_components.cu b/cpp/test/sparse/neighbors/connect_components.cu index babe940e81..f50d62cf15 100644 --- a/cpp/test/sparse/neighbors/connect_components.cu +++ b/cpp/test/sparse/neighbors/connect_components.cu @@ -124,8 +124,8 @@ class ConnectComponentsTest params.n_row, params.n_col, red_op, - 3, - 2); + params.n_row, + params.n_col); /** * Construct final edge list From 10d5d9d7b0b4f385cb149726f83ba292de7254a0 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Fri, 19 May 2023 14:17:58 -0700 Subject: [PATCH 20/53] Remove unnecessary imports --- cpp/include/raft/matrix/detail/gather.cuh | 1 - cpp/include/raft/matrix/detail/scatter.cuh | 2 -- cpp/include/raft/sparse/neighbors/detail/connect_components.cuh | 1 - 3 files changed, 4 deletions(-) diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index c05af17cb0..2883f815e6 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -16,7 +16,6 @@ #pragma once #include -#include #include #include #include diff --git a/cpp/include/raft/matrix/detail/scatter.cuh b/cpp/include/raft/matrix/detail/scatter.cuh index c6e4bf5c6e..850abec22e 100644 --- a/cpp/include/raft/matrix/detail/scatter.cuh +++ b/cpp/include/raft/matrix/detail/scatter.cuh @@ -17,8 +17,6 @@ #include #include -#include -#include #include #include #include diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index d95d3192aa..123ffd916f 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -22,7 +22,6 @@ #include #include -#include #include #include #include From 76031d1e5c0006cfa193c66dd51717fe971ff65b Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Sat, 20 May 2023 17:05:37 -0700 Subject: [PATCH 21/53] Refactor based on Allard's comments --- cpp/include/raft/matrix/detail/gather.cuh | 2 +- cpp/include/raft/matrix/gather.cuh | 2 +- .../neighbors/detail/connect_components.cuh | 83 ++++++++++--------- 3 files changed, 47 insertions(+), 40 deletions(-) diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index 2883f815e6..d0892da2f5 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -367,7 +367,7 @@ void gather_if(const InputIteratorT in, * * @param[in] handle raft handle * @param[inout] inout input matrix (n_rows * n_cols) - * @param[in] map map containing the order in which rows are to be rearranged (n_rows) + * @param[in] map pointer to the input sequence of gather locations * @param[in] batch_size column batch size */ template diff --git a/cpp/include/raft/matrix/gather.cuh b/cpp/include/raft/matrix/gather.cuh index 6b62f72aeb..069312ce00 100644 --- a/cpp/include/raft/matrix/gather.cuh +++ b/cpp/include/raft/matrix/gather.cuh @@ -308,7 +308,7 @@ void gather_if(const raft::resources& handle, * * @param[in] handle raft handle * @param[inout] inout input matrix (n_rows * n_cols) - * @param[in] map map containing the order in which rows are to be rearranged (n_rows) + * @param[in] map Pointer to the input sequence of gather locations * @param[in] col_batch_size column batch size */ template diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 123ffd916f..8b90b57fcf 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -200,7 +200,8 @@ struct LookupColorOp { * @param[in] X original dense data * @param[in] n_rows number of rows in original dense data * @param[in] n_cols number of columns in original dense data - * @param[in] batch_size batch size for computing nearest neighbors + * @param[in] row_batch_size row batch size for computing nearest neighbors + & @param[in] col_batch_size column batch size for sorting and 'unsorting' * @param[in] reduction_op reduction operation for computing nearest neighbors */ template @@ -211,12 +212,29 @@ void perform_1nn(raft::resources const& handle, const value_t* X, size_t n_rows, size_t n_cols, - size_t batch_size, + size_t row_batch_size, + size_t col_batch_size, red_op reduction_op) { auto stream = resource::get_cuda_stream(handle); auto exec_policy = resource::get_thrust_policy(handle); + auto sort_plan = raft::make_device_vector(handle, (value_idx)n_rows); + raft::linalg::map_offset(handle, sort_plan.view(), [] __device__(value_idx idx) { return idx; }); + + thrust::sort_by_key( + resource::get_thrust_policy(handle), colors, colors + n_rows, sort_plan.data_handle()); + + // Modify the reduction operation based on the sort plan. This is particularly needed for HDBSCAN + reduction_op.gather(handle, sort_plan.data_handle()); + + auto X_mutable_view = + raft::make_device_matrix_view(const_cast(X), n_rows, n_cols); + auto sort_plan_const_view = + raft::make_device_vector_view(sort_plan.data_handle(), n_rows); + raft::matrix::gather(handle, X_mutable_view, sort_plan_const_view, (value_idx)col_batch_size); + + // Get the number of unique components from the array of colors value_idx n_components = get_n_components(colors, n_rows, stream); // colors_group_idxs is an array containing the *end* indices of each color @@ -234,7 +252,7 @@ void perform_1nn(raft::resources const& handle, raft::linalg::rowNorm( x_norm.data_handle(), X, n_cols, n_rows, raft::linalg::L2Norm, true, stream); - auto adj = raft::make_device_matrix(handle, batch_size, n_components); + auto adj = raft::make_device_matrix(handle, row_batch_size, n_components); using OutT = raft::KeyValuePair; using ParamT = raft::distance::masked_l2_nn_params; @@ -244,11 +262,11 @@ void perform_1nn(raft::resources const& handle, auto X_full_view = raft::make_device_matrix_view(X, n_rows, n_cols); - size_t n_batches = raft::ceildiv(n_rows, batch_size); + size_t n_batches = raft::ceildiv(n_rows, row_batch_size); for (size_t bid = 0; bid < n_batches; bid++) { - size_t batch_offset = bid * batch_size; - size_t rows_per_batch = min(batch_size, n_rows - batch_offset); + size_t batch_offset = bid * row_batch_size; + size_t rows_per_batch = min(row_batch_size, n_rows - batch_offset); auto X_batch_view = raft::make_device_matrix_view( X + batch_offset * n_cols, rows_per_batch, n_cols); @@ -284,6 +302,21 @@ void perform_1nn(raft::resources const& handle, } LookupColorOp extract_colors_op(colors); thrust::transform(exec_policy, kvp, kvp + n_rows, nn_colors, extract_colors_op); + + thrust::transform(exec_policy, + kvp, + kvp + n_rows, + kvp, + [sort_plan = sort_plan.data_handle()] __device__(OutT KVP) { + OutT res; + res.value = KVP.value; + res.key = sort_plan[KVP.key]; + return res; + }); + + raft::matrix::scatter(handle, X_mutable_view, sort_plan_const_view, (value_idx)col_batch_size); + thrust::scatter(exec_policy, kvp, kvp + n_rows, sort_plan.data_handle(), kvp); + reduction_op.scatter(handle, sort_plan.data_handle()); } /** @@ -323,7 +356,6 @@ __global__ void min_components_by_color_kernel(value_idx* out_rows, value_t* out_vals, const value_idx* out_index, const value_idx* indices, - const value_idx* sort_plan, const raft::KeyValuePair* kvp, size_t nnz) { @@ -334,8 +366,8 @@ __global__ void min_components_by_color_kernel(value_idx* out_rows, int idx = out_index[tid]; if ((tid == 0 || (out_index[tid - 1] != idx))) { - out_rows[idx] = sort_plan[indices[tid]]; - out_cols[idx] = sort_plan[kvp[tid].key]; + out_rows[idx] = indices[tid]; + out_cols[idx] = kvp[tid].key; out_vals[idx] = kvp[tid].value; } } @@ -356,7 +388,6 @@ template void min_components_by_color(raft::sparse::COO& coo, const value_idx* out_index, const value_idx* indices, - const value_idx* sort_plan, const raft::KeyValuePair* kvp, size_t nnz, cudaStream_t stream) @@ -367,7 +398,7 @@ void min_components_by_color(raft::sparse::COO& coo, * the min. */ min_components_by_color_kernel<<>>( - coo.rows(), coo.cols(), coo.vals(), out_index, indices, sort_plan, kvp, nnz); + coo.rows(), coo.cols(), coo.vals(), out_index, indices, kvp, nnz); } /** @@ -424,23 +455,6 @@ void connect_components(raft::resources const& handle, raft::label::make_monotonic( colors.data(), const_cast(orig_colors), n_rows, stream, zero_based); - auto sort_plan = raft::make_device_vector(handle, (value_idx)n_rows); - raft::linalg::map_offset(handle, sort_plan.view(), [] __device__(value_idx idx) { return idx; }); - - thrust::sort_by_key(resource::get_thrust_policy(handle), - colors.data(), - colors.data() + n_rows, - sort_plan.data_handle()); - - // Modify the reduction operation based on the sort plan. This is particularly needed for HDBSCAN - reduction_op.gather(handle, sort_plan.data_handle()); - - auto X_mutable_view = - raft::make_device_matrix_view(const_cast(X), n_rows, n_cols); - auto sort_plan_const_view = - raft::make_device_vector_view(sort_plan.data_handle(), n_rows); - raft::matrix::gather(handle, X_mutable_view, sort_plan_const_view, (value_idx)col_batch_size); - /** * First compute 1-nn for all colors where the color of each data point * is guaranteed to be != color of its nearest neighbor. @@ -457,6 +471,7 @@ void connect_components(raft::resources const& handle, n_rows, n_cols, row_batch_size, + col_batch_size, reduction_op); /** @@ -490,16 +505,8 @@ void connect_components(raft::resources const& handle, raft::sparse::COO min_edges(stream); min_edges.allocate(size, n_rows, n_rows, true, stream); - min_components_by_color(min_edges, - out_index.data(), - src_indices.data(), - sort_plan.data_handle(), - temp_inds_dists.data(), - n_rows, - stream); - - raft::matrix::scatter(handle, X_mutable_view, sort_plan_const_view, (value_idx)col_batch_size); - reduction_op.scatter(handle, sort_plan.data_handle()); + min_components_by_color( + min_edges, out_index.data(), src_indices.data(), temp_inds_dists.data(), n_rows, stream); /** * Symmetrize resulting edge list From 87c62a61150c3021b4dbe7dbad7d02b03908cb58 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Mon, 22 May 2023 14:27:48 -0700 Subject: [PATCH 22/53] Debugging differences between fused and masked --- cpp/bench/prims/distance/masked_nn.cu | 1 + .../raft/cluster/detail/single_linkage.cuh | 2 +- .../neighbors/detail/connect_components.cuh | 83 ++++++++++++++++--- cpp/test/distance/masked_nn.cu | 5 ++ cpp/test/matrix/gather.cu | 2 + .../sparse/neighbors/connect_components.cu | 2 +- 6 files changed, 82 insertions(+), 13 deletions(-) diff --git a/cpp/bench/prims/distance/masked_nn.cu b/cpp/bench/prims/distance/masked_nn.cu index c804ecb3a1..5f63fa4779 100644 --- a/cpp/bench/prims/distance/masked_nn.cu +++ b/cpp/bench/prims/distance/masked_nn.cu @@ -128,6 +128,7 @@ struct masked_l2_nn : public fixture { dim3 block(32, 32); dim3 grid(10, 10); init_adj<<>>(p.pattern, p.n, adj.view(), group_idxs.view()); + RAFT_CUDA_TRY(cudaGetLastError()); } diff --git a/cpp/include/raft/cluster/detail/single_linkage.cuh b/cpp/include/raft/cluster/detail/single_linkage.cuh index 848ca0357e..ddd422a89b 100644 --- a/cpp/include/raft/cluster/detail/single_linkage.cuh +++ b/cpp/include/raft/cluster/detail/single_linkage.cuh @@ -81,7 +81,7 @@ void single_linkage(raft::resources const& handle, * 2. Construct MST, sorted by weights */ rmm::device_uvector color(m, stream); - raft::sparse::neighbors::FixConnectivitiesRedOp op(m); + raft::sparse::neighbors::FixConnectivitiesRedOp op(color.data(), m); detail::build_sorted_mst(handle, X, indptr.data(), diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 8b90b57fcf..a975af8031 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -15,6 +15,7 @@ */ #pragma once +#include "raft/core/logger-macros.hpp" #include #include #include @@ -48,6 +49,9 @@ #include #include +#include +#include + #include #include @@ -65,25 +69,26 @@ namespace raft::sparse::neighbors::detail { */ template struct FixConnectivitiesRedOp { + value_idx* colors; value_idx m; // default constructor for cutlass - DI FixConnectivitiesRedOp() : m(0) {} + DI FixConnectivitiesRedOp() : colors(0), m(0) {} - FixConnectivitiesRedOp(value_idx m_) : m(m_){}; + FixConnectivitiesRedOp(value_idx* colors_, value_idx m_) : colors(colors_), m(m_){}; typedef typename raft::KeyValuePair KVP; DI void operator()(value_idx rit, KVP* out, const KVP& other) const { - if (rit < m && other.value < out->value) { + if (rit < m && other.value < out->value && colors[rit] != colors[other.key]) { out->key = other.key; out->value = other.value; } } - DI KVP operator()(value_idx rit, const KVP& a, const KVP& b) + DI KVP operator()(value_idx rit, const KVP& a, const KVP& b) const { - if (rit < m && a.value < b.value) { + if (rit < m && a.value < b.value && colors[rit] != colors[a.key]) { return a; } else return b; @@ -103,9 +108,17 @@ struct FixConnectivitiesRedOp { DI value_t get_value(value_t& out) const { return out; } - void gather(raft::resources const& handle, value_idx* map) {} + void gather(const raft::resources& handle, value_idx* map) + { + thrust::gather(raft::resource::get_thrust_policy(handle), map, map + m, colors, colors); + } + + void scatter(const raft::resources& handle, value_idx* map) + { + thrust::scatter( + raft::resource::get_thrust_policy(handle), colors, colors + m, map, colors); + } - void scatter(raft::resources const& handle, value_idx* map) {} }; /** @@ -244,6 +257,9 @@ void perform_1nn(raft::resources const& handle, auto colors_group_idxs = raft::make_device_vector(handle, n_components + 1); raft::sparse::convert::sorted_coo_to_csr( colors, n_rows, colors_group_idxs.data_handle(), n_components + 1, stream); + + raft::print_device_vector("colors", colors, n_rows, std::cout); + raft::print_device_vector("color_group_idxs", colors_group_idxs.data_handle(), n_components + 1, std::cout); auto group_idxs_view = raft::make_device_vector_view(colors_group_idxs.data_handle() + 1, n_components); @@ -281,10 +297,16 @@ void perform_1nn(raft::resources const& handle, value_idx col = idx % n_components; return colors[batch_offset + row] != col; }; + + auto adj_vector_view = raft::make_device_vector_view( + adj.data_handle(), rows_per_batch * n_components); + + raft::linalg::map_offset(handle, adj_vector_view, mask_op); + auto adj_view = raft::make_device_matrix_view( adj.data_handle(), rows_per_batch, n_components); - raft::linalg::map_offset(handle, adj_view, mask_op); + raft::print_device_vector("adj", adj.data_handle(), rows_per_batch * n_components, std::cout); auto kvp_view = raft::make_device_vector_view, value_idx>( @@ -300,8 +322,6 @@ void perform_1nn(raft::resources const& handle, group_idxs_view, kvp_view); } - LookupColorOp extract_colors_op(colors); - thrust::transform(exec_policy, kvp, kvp + n_rows, nn_colors, extract_colors_op); thrust::transform(exec_policy, kvp, @@ -316,7 +336,11 @@ void perform_1nn(raft::resources const& handle, raft::matrix::scatter(handle, X_mutable_view, sort_plan_const_view, (value_idx)col_batch_size); thrust::scatter(exec_policy, kvp, kvp + n_rows, sort_plan.data_handle(), kvp); + thrust::scatter(exec_policy, colors, colors + n_rows, sort_plan.data_handle(), colors); reduction_op.scatter(handle, sort_plan.data_handle()); + + LookupColorOp extract_colors_op(colors); + thrust::transform(exec_policy, kvp, kvp + n_rows, nn_colors, extract_colors_op); } /** @@ -454,6 +478,8 @@ void connect_components(raft::resources const& handle, bool zero_based = true; raft::label::make_monotonic( colors.data(), const_cast(orig_colors), n_rows, stream, zero_based); + + raft::print_device_vector("orig_colors", orig_colors, n_rows, std::cout); /** * First compute 1-nn for all colors where the color of each data point @@ -507,12 +533,47 @@ void connect_components(raft::resources const& handle, min_components_by_color( min_edges, out_index.data(), src_indices.data(), temp_inds_dists.data(), n_rows, stream); - + + raft::sparse::op::coo_sort(n_rows, + n_rows, + min_edges.nnz, + min_edges.rows(), + min_edges.cols(), + min_edges.vals(), + stream); + + rmm::device_uvector min_edges_row_colors(min_edges.nnz, stream); + rmm::device_uvector min_edges_col_colors(min_edges.nnz, stream); + + thrust::transform(rmm::exec_policy(stream), min_edges.rows(), min_edges.rows() + min_edges.nnz, min_edges_row_colors.data(), [orig_colors]__device__(auto idx) {return orig_colors[idx];}); + thrust::transform(rmm::exec_policy(stream), min_edges.cols(), min_edges.cols() + min_edges.nnz, min_edges_col_colors.data(), [orig_colors]__device__(auto idx) {return orig_colors[idx];}); + + + raft::print_device_vector("mnn_min_edges_rows", min_edges.rows(), min_edges.nnz, std::cout); + raft::print_device_vector("mnn_min_edges_cols", min_edges.cols(), min_edges.nnz, std::cout); + raft::print_device_vector("mnn_min_edges_vals", min_edges.vals(), min_edges.nnz, std::cout); + raft::print_device_vector("row_colors", min_edges_row_colors.data(), min_edges.nnz, std::cout); + raft::print_device_vector("col_colors", min_edges_col_colors.data(), min_edges.nnz, std::cout); + /** * Symmetrize resulting edge list */ raft::sparse::linalg::symmetrize( handle, min_edges.rows(), min_edges.cols(), min_edges.vals(), n_rows, n_rows, size, out); + + rmm::device_uvector row_colors(out.nnz, stream); + rmm::device_uvector col_colors(out.nnz, stream); + + thrust::transform(rmm::exec_policy(stream), out.rows(), out.rows() + out.nnz, row_colors.data(), [orig_colors]__device__(auto idx) {return orig_colors[idx];}); + thrust::transform(rmm::exec_policy(stream), out.cols(), out.cols() + out.nnz, col_colors.data(), [orig_colors]__device__(auto idx) {return orig_colors[idx];}); + + + raft::print_device_vector("mnn_out_rows", out.rows(), out.nnz, std::cout); + raft::print_device_vector("mnn_out_cols", out.cols(), out.nnz, std::cout); + raft::print_device_vector("mnn_out_vals", out.vals(), out.nnz, std::cout); + raft::print_device_vector("row_colors", row_colors.data(), out.nnz, std::cout); + raft::print_device_vector("col_colors", col_colors.data(), out.nnz, std::cout); } + }; // end namespace raft::sparse::neighbors::detail diff --git a/cpp/test/distance/masked_nn.cu b/cpp/test/distance/masked_nn.cu index 00653f4ced..b7300e34ac 100644 --- a/cpp/test/distance/masked_nn.cu +++ b/cpp/test/distance/masked_nn.cu @@ -272,6 +272,11 @@ auto run_masked_nn(const raft::handle_t& handle, Inputs inp, const Params // Create output auto out = raft::make_device_vector(handle, p.m); + RAFT_LOG_INFO("adj extents %d %d", inp.adj.extent(0), inp.adj.extent(1)); + RAFT_LOG_INFO("group_idxs extents %d", inp.group_idxs.extent(0)); + raft::print_device_vector("adj", inp.adj.data_handle(), inp.adj.extent(0) * inp.adj.extent(1), std::cout); + raft::print_device_vector("group_idxs", inp.group_idxs.data_handle(), inp.group_idxs.extent(0), std::cout); + // Launch kernel raft::distance::masked_l2_nn(handle, masked_l2_params, diff --git a/cpp/test/matrix/gather.cu b/cpp/test/matrix/gather.cu index cab96576d2..d2eb0f3104 100644 --- a/cpp/test/matrix/gather.cu +++ b/cpp/test/matrix/gather.cu @@ -189,6 +189,8 @@ const std::vector> inputs_i32 = const std::vector> inputs_i64 = raft::util::itertools::product>( {25, 2000}, {6, 31, 129}, {11, 999}, {1234ULL}); +const std::vector> inputs_inplace = + raft::util::itertools::product>({25, 2000}, {6, 31, 129}, {11, 999}, {1234ULL}); GATHER_TEST((GatherTest), GatherTestFU32I32, inputs_i32); GATHER_TEST((GatherTest), diff --git a/cpp/test/sparse/neighbors/connect_components.cu b/cpp/test/sparse/neighbors/connect_components.cu index f50d62cf15..b3b42084d3 100644 --- a/cpp/test/sparse/neighbors/connect_components.cu +++ b/cpp/test/sparse/neighbors/connect_components.cu @@ -116,7 +116,7 @@ class ConnectComponentsTest /** * 3. connect_components to fix connectivities */ - raft::linkage::FixConnectivitiesRedOp red_op(params.n_row); + raft::linkage::FixConnectivitiesRedOp red_op(colors.data(), params.n_row); raft::linkage::connect_components(handle, out_edges, data.data(), From f34f0bc66aac0143c080931af41f7513347b083b Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Mon, 22 May 2023 15:02:29 -0700 Subject: [PATCH 23/53] cleanup --- cpp/include/raft/matrix/detail/gather.cuh | 1 + .../raft/sparse/neighbors/detail/connect_components.cuh | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index d0892da2f5..ebca374b74 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include #include diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index a975af8031..2270f5c3e5 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -335,8 +335,9 @@ void perform_1nn(raft::resources const& handle, }); raft::matrix::scatter(handle, X_mutable_view, sort_plan_const_view, (value_idx)col_batch_size); - thrust::scatter(exec_policy, kvp, kvp + n_rows, sort_plan.data_handle(), kvp); - thrust::scatter(exec_policy, colors, colors + n_rows, sort_plan.data_handle(), colors); + auto it = thrust::make_zip_iterator(thrust::make_tuple(kvp, colors)); + thrust::scatter(exec_policy, it, it + n_rows, sort_plan.data_handle(), it); + // thrust::scatter(exec_policy, colors, colors + n_rows, sort_plan.data_handle(), colors); reduction_op.scatter(handle, sort_plan.data_handle()); LookupColorOp extract_colors_op(colors); From e0b411868ae5e431cd3c4e4537f3d239e627c751 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Mon, 22 May 2023 15:26:22 -0700 Subject: [PATCH 24/53] Update copyright --- cpp/include/raft/matrix/detail/gather.cuh | 1 + cpp/include/raft/matrix/detail/scatter.cuh | 6 +++--- cpp/include/raft/matrix/gather.cuh | 2 +- cpp/include/raft/matrix/scatter.cuh | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index ebca374b74..2fcb76df70 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #pragma once #include diff --git a/cpp/include/raft/matrix/detail/scatter.cuh b/cpp/include/raft/matrix/detail/scatter.cuh index 850abec22e..827ccc40a5 100644 --- a/cpp/include/raft/matrix/detail/scatter.cuh +++ b/cpp/include/raft/matrix/detail/scatter.cuh @@ -1,13 +1,13 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except inout compliance with the License. + * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to inout writing, software + * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and diff --git a/cpp/include/raft/matrix/gather.cuh b/cpp/include/raft/matrix/gather.cuh index 069312ce00..95b5e70c55 100644 --- a/cpp/include/raft/matrix/gather.cuh +++ b/cpp/include/raft/matrix/gather.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/matrix/scatter.cuh b/cpp/include/raft/matrix/scatter.cuh index 6b1f5078e0..acf73b39f3 100644 --- a/cpp/include/raft/matrix/scatter.cuh +++ b/cpp/include/raft/matrix/scatter.cuh @@ -1,4 +1,4 @@ -/** +/* * Copyright (c) 2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); From f1b3bf46c55038ad36d4b3702a629099b63177e5 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Wed, 24 May 2023 15:15:20 -0700 Subject: [PATCH 25/53] Working gtest --- cpp/include/raft/matrix/detail/gather.cuh | 75 +++++++++++++---------- cpp/include/raft/matrix/gather.cuh | 25 +++----- cpp/test/matrix/gather.cu | 41 ++++++++----- 3 files changed, 81 insertions(+), 60 deletions(-) diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index 2fcb76df70..42c74e0b82 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -350,36 +350,24 @@ void gather_if(const InputIteratorT in, gatherImpl(in, D, N, map, stencil, map_length, out, pred_op, transform_op, stream); } -/** - * @brief In-place gather elements in a row-major matrix according to a - * map. The length of the map is equal to the number of rows. The - * map specifies new order in which rows of the input matrix are rearranged, - * i.e. in the resulting matrix, row i is assigned to the position map[i]. - * example, the matrix [[1, 2, 3], [4, 5, 6], [7, 8, 9]] with the - * map [2, 0, 1] will be transformed to [[7, 8, 9], [1, 2, 3], [4, 5, 6]]. - * Batching is done on columns and an additional scratch space of - * shape n_rows * cols_batch_size is created. For each batch, chunks - * of columns from each row are copied into the appropriate location - * in the scratch space and copied back to the corresponding locations - * in the input matrix. - * - * @tparam InputIteratorT - * @tparam MapIteratorT - * @tparam IndexT - * - * @param[in] handle raft handle - * @param[inout] inout input matrix (n_rows * n_cols) - * @param[in] map pointer to the input sequence of gather locations - * @param[in] batch_size column batch size - */ -template -void gather(raft::resources const& handle, - raft::device_matrix_view inout, - raft::device_vector_view map, +template +void gatherInplaceImpl(raft::resources const& handle, + raft::device_matrix_view inout, + raft::device_vector_view map, + MapTransformOp transform_op, IndexT batch_size) { + // return type of MapTransformOp, must be convertible to IndexT + typedef typename std::result_of::type MapTransformOpReturnT; + static_assert((std::is_convertible::value), + "MapTransformOp's result type must be convertible to signed integer"); + IndexT m = inout.extent(0); IndexT n = inout.extent(1); + IndexT map_length = map.extent(0); + + // skip in case of 0 length input + if (map_length <= 0 || m <= 0 || n <= 0) return; auto exec_policy = resource::get_thrust_policy(handle); IndexT n_batches = raft::ceildiv(n, batch_size); @@ -387,22 +375,28 @@ void gather(raft::resources const& handle, IndexT batch_offset = bid * batch_size; IndexT cols_per_batch = min(batch_size, n - batch_offset); auto scratch_space = - raft::make_device_vector(handle, m * cols_per_batch); + raft::make_device_vector(handle, map_length * cols_per_batch); - auto scatter_op = [inout = inout.data_handle(), + auto gather_op = [inout = inout.data_handle(), map = map.data_handle(), + transform_op, batch_offset, + map_length, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n] __device__(auto idx) { IndexT row = idx / cols_per_batch; IndexT col = idx % cols_per_batch; - return inout[map[row] * n + batch_offset + col]; + MapT map_val = map[row]; + + IndexT i_src = transform_op(map_val); + return inout[i_src * n + batch_offset + col]; }; - raft::linalg::map_offset(handle, scratch_space.view(), scatter_op); + raft::linalg::map_offset(handle, scratch_space.view(), gather_op); auto copy_op = [inout = inout.data_handle(), map = map.data_handle(), scratch_space = scratch_space.data_handle(), batch_offset, + map_length, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), n] __device__(auto idx) { IndexT row = idx / cols_per_batch; @@ -411,10 +405,29 @@ void gather(raft::resources const& handle, return; }; auto counting = thrust::make_counting_iterator(0); - thrust::for_each(exec_policy, counting, counting + m * cols_per_batch, copy_op); + thrust::for_each(exec_policy, counting, counting + map_length * cols_per_batch, copy_op); } } +template +void gather(raft::resources const& handle, + raft::device_matrix_view inout, + raft::device_vector_view map, + MapTransformOp transform_op, + IndexT batch_size) +{ + gatherInplaceImpl(handle, inout, map, transform_op, batch_size); +} + +template +void gather(raft::resources const& handle, + raft::device_matrix_view inout, + raft::device_vector_view map, + IndexT batch_size) +{ + gatherInplaceImpl(handle, inout, map, raft::identity_op(), batch_size); +} + } // namespace detail } // namespace matrix } // namespace raft diff --git a/cpp/include/raft/matrix/gather.cuh b/cpp/include/raft/matrix/gather.cuh index 95b5e70c55..aa2827a30d 100644 --- a/cpp/include/raft/matrix/gather.cuh +++ b/cpp/include/raft/matrix/gather.cuh @@ -291,10 +291,9 @@ void gather_if(const raft::resources& handle, /** * @brief In-place gather elements in a row-major matrix according to a - * map. The length of the map is equal to the number of rows. The - * map specifies new order in which rows of the input matrix are rearranged, - * i.e. in the resulting matrix, row i is assigned to the position map[i]. - * example, the matrix [[1, 2, 3], [4, 5, 6], [7, 8, 9]] with the + * map. The map specifies new order in which rows of the input matrix are + * rearranged, i.e. in the resulting matrix, row i is assigned to the position + * map[i]. For example, the matrix [[1, 2, 3], [4, 5, 6], [7, 8, 9]] with the * map [2, 0, 1] will be transformed to [[7, 8, 9], [1, 2, 3], [4, 5, 6]]. * Batching is done on columns and an additional scratch space of * shape n_rows * cols_batch_size is created. For each batch, chunks @@ -309,21 +308,17 @@ void gather_if(const raft::resources& handle, * @param[in] handle raft handle * @param[inout] inout input matrix (n_rows * n_cols) * @param[in] map Pointer to the input sequence of gather locations - * @param[in] col_batch_size column batch size + * @param[in] col_batch_size column batch size. Determines the size of the scratch space (map_length, col_batch_size) + * @param[in] transform_op (optional) Transformation to apply to map values */ -template +template void gather(raft::resources const& handle, raft::device_matrix_view inout, - raft::device_vector_view map, - idx_t col_batch_size) + raft::device_vector_view map, + idx_t col_batch_size, + map_xform_t transform_op = raft::identity_op()) { - idx_t m = inout.extent(0); - idx_t n = inout.extent(1); - idx_t map_len = map.extent(0); - RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= n, "col_batch_size should be > 0 and <= n"); - RAFT_EXPECTS(map_len == m, "size of map should be equal to the number of rows in input matrix"); - - detail::gather(handle, inout, map, col_batch_size); + detail::gather(handle, inout, map, transform_op, col_batch_size); } /** @} */ // end of group matrix_gather diff --git a/cpp/test/matrix/gather.cu b/cpp/test/matrix/gather.cu index d2eb0f3104..a0b58598af 100644 --- a/cpp/test/matrix/gather.cu +++ b/cpp/test/matrix/gather.cu @@ -15,6 +15,8 @@ */ #include "../test_utils.cuh" +#include "raft/core/logger-macros.hpp" +#include "raft/util/cudart_utils.hpp" #include #include #include @@ -72,10 +74,11 @@ struct GatherInputs { IdxT nrows; IdxT ncols; IdxT map_length; + IdxT col_batch_size; unsigned long long int seed; }; -template +template class GatherTest : public ::testing::TestWithParam> { protected: GatherTest() @@ -90,7 +93,7 @@ class GatherTest : public ::testing::TestWithParam> { } void SetUp() override - { + { raft::random::RngState r(params.seed); raft::random::RngState r_int(params.seed); @@ -143,6 +146,8 @@ class GatherTest : public ::testing::TestWithParam> { auto in_view = raft::make_device_matrix_view( d_in.data(), params.nrows, params.ncols); + auto inout_view = raft::make_device_matrix_view( + d_in.data(), params.nrows, params.ncols); auto out_view = raft::make_device_matrix_view( d_out_act.data(), map_length, params.ncols); auto map_view = raft::make_device_vector_view(d_map.data(), map_length); @@ -154,12 +159,20 @@ class GatherTest : public ::testing::TestWithParam> { handle, in_view, out_view, map_view, stencil_view, pred_op, transform_op); } else if (Conditional) { raft::matrix::gather_if(handle, in_view, out_view, map_view, stencil_view, pred_op); + } else if (MapTransform && Inplace) { + raft::matrix::gather(handle, inout_view, map_view, params.col_batch_size, transform_op); } else if (MapTransform) { raft::matrix::gather(handle, in_view, map_view, out_view, transform_op); + } else if (Inplace) { + raft::matrix::gather(handle, inout_view, map_view, params.col_batch_size); } else { raft::matrix::gather(handle, in_view, map_view, out_view); } + if (Inplace) { + raft::copy_async(d_out_act.data(), d_in.data(), params.map_length * params.ncols, raft::resource::get_cuda_stream(handle)); + } + resource::sync_stream(handle, stream); } @@ -185,29 +198,29 @@ class GatherTest : public ::testing::TestWithParam> { INSTANTIATE_TEST_CASE_P(GatherTests, test_name, ::testing::ValuesIn(test_inputs)) const std::vector> inputs_i32 = - raft::util::itertools::product>({25, 2000}, {6, 31, 129}, {11, 999}, {1234ULL}); + raft::util::itertools::product>({25, 2000}, {6, 31, 129}, {11, 999}, {2, 3, 6}, {1234ULL}); const std::vector> inputs_i64 = raft::util::itertools::product>( - {25, 2000}, {6, 31, 129}, {11, 999}, {1234ULL}); -const std::vector> inputs_inplace = - raft::util::itertools::product>({25, 2000}, {6, 31, 129}, {11, 999}, {1234ULL}); + {25, 2000}, {6, 31, 129}, {11, 999}, {2, 3, 6}, {1234ULL}); -GATHER_TEST((GatherTest), GatherTestFU32I32, inputs_i32); -GATHER_TEST((GatherTest), +GATHER_TEST((GatherTest), GatherTestFU32I32, inputs_i32); +GATHER_TEST((GatherTest), GatherTransformTestFU32I32, inputs_i32); -GATHER_TEST((GatherTest), GatherIfTestFU32I32, inputs_i32); -GATHER_TEST((GatherTest), +GATHER_TEST((GatherTest), GatherIfTestFU32I32, inputs_i32); +GATHER_TEST((GatherTest), GatherIfTransformTestFU32I32, inputs_i32); -GATHER_TEST((GatherTest), +GATHER_TEST((GatherTest), GatherIfTransformTestDU32I32, inputs_i32); -GATHER_TEST((GatherTest), +GATHER_TEST((GatherTest), GatherIfTransformTestFU32I64, inputs_i64); -GATHER_TEST((GatherTest), +GATHER_TEST((GatherTest), GatherIfTransformTestFI64I64, inputs_i64); - +GATHER_TEST((GatherTest), GatherInplaceTestFU32I32, inputs_i32); +GATHER_TEST((GatherTest), GatherInplaceTestFU32I64, inputs_i64); +GATHER_TEST((GatherTest), GatherInplaceTestFI64I64, inputs_i64); } // end namespace raft \ No newline at end of file From c433d497d14c81cf88467574ce312ccf3f4372a0 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Thu, 25 May 2023 17:56:10 -0700 Subject: [PATCH 26/53] scatter gtest and refactoring --- cpp/include/raft/matrix/detail/gather.cuh | 14 +- cpp/include/raft/matrix/detail/scatter.cuh | 58 ++++++-- cpp/include/raft/matrix/gather.cuh | 17 ++- cpp/include/raft/matrix/scatter.cuh | 30 ++-- cpp/test/CMakeLists.txt | 1 + cpp/test/matrix/gather.cu | 17 ++- cpp/test/matrix/scatter.cu | 160 +++++++++++++++++++++ 7 files changed, 253 insertions(+), 44 deletions(-) create mode 100644 cpp/test/matrix/scatter.cu diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index 42c74e0b82..ae2e11ba2f 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -359,7 +359,7 @@ void gatherInplaceImpl(raft::resources const& handle, { // return type of MapTransformOp, must be convertible to IndexT typedef typename std::result_of::type MapTransformOpReturnT; - static_assert((std::is_convertible::value), + RAFT_EXPECTS((std::is_convertible::value), "MapTransformOp's result type must be convertible to signed integer"); IndexT m = inout.extent(0); @@ -367,7 +367,14 @@ void gatherInplaceImpl(raft::resources const& handle, IndexT map_length = map.extent(0); // skip in case of 0 length input - if (map_length <= 0 || m <= 0 || n <= 0) return; + if (map_length <= 0 || m <= 0 || n <= 0 || batch_size < 0) return; + + RAFT_EXPECTS(map_length <= m, "Length of map should be <= number of rows for inplace gather"); + + // re-assign batch_size for default case + if (batch_size == 0) batch_size = n; + + RAFT_EXPECTS(batch_size <= n, "batch size should be <= number of columns"); auto exec_policy = resource::get_thrust_policy(handle); IndexT n_batches = raft::ceildiv(n, batch_size); @@ -392,6 +399,9 @@ void gatherInplaceImpl(raft::resources const& handle, return inout[i_src * n + batch_offset + col]; }; raft::linalg::map_offset(handle, scratch_space.view(), gather_op); + + cudaDeviceSynchronize(); + raft::print_device_vector("gather_scratch_space", scratch_space.data_handle(), m * cols_per_batch, std::cout); auto copy_op = [inout = inout.data_handle(), map = map.data_handle(), scratch_space = scratch_space.data_handle(), diff --git a/cpp/include/raft/matrix/detail/scatter.cuh b/cpp/include/raft/matrix/detail/scatter.cuh index 827ccc40a5..c78a1aeb8f 100644 --- a/cpp/include/raft/matrix/detail/scatter.cuh +++ b/cpp/include/raft/matrix/detail/scatter.cuh @@ -15,6 +15,7 @@ */ #pragma once +#include "raft/core/resource/cuda_stream.hpp" #include #include #include @@ -49,26 +50,44 @@ namespace detail { * @param[inout] map map containing the destination index for each row (n_rows) * @param[inout] batch_size column batch size */ -template -void scatter(raft::resources const& handle, - raft::device_matrix_view inout, - raft::device_vector_view map, - IndexT batch_size) + + + + + +template +void scatterInplaceImpl(raft::resources const& handle, + raft::device_matrix_view inout, + raft::device_vector_view map, + IndexT batch_size) { + IndexT m = inout.extent(0); IndexT n = inout.extent(1); + IndexT map_length = map.extent(0); + + // skip in case of 0 length input + if (map_length <= 0 || m <= 0 || n <= 0 || batch_size < 0) return; + + RAFT_EXPECTS(map_length == m, "Length of map should be equal to number of rows for inplace scatter"); + + // re-assign batch_size for default case + if (batch_size == 0) batch_size = n; + + RAFT_EXPECTS(batch_size <= n, "batch size should be <= number of columns"); auto exec_policy = resource::get_thrust_policy(handle); IndexT n_batches = raft::ceildiv(n, batch_size); + auto scratch_space = + raft::make_device_vector(handle, m * batch_size); + for (IndexT bid = 0; bid < n_batches; bid++) { IndexT batch_offset = bid * batch_size; IndexT cols_per_batch = min(batch_size, n - batch_offset); - auto scratch_space = - raft::make_device_vector(handle, m * cols_per_batch); - auto scatter_op = [inout = inout.data_handle(), + auto copy_op = [inout = inout.data_handle(), map = map.data_handle(), batch_offset, cols_per_batch = raft::util::FastIntDiv(cols_per_batch), @@ -77,8 +96,11 @@ void scatter(raft::resources const& handle, IndexT col = idx % cols_per_batch; return inout[row * n + batch_offset + col]; }; - raft::linalg::map_offset(handle, scratch_space.view(), scatter_op); - auto copy_op = [inout = inout.data_handle(), + raft::linalg::map_offset(handle, raft::make_device_vector_view(scratch_space.data_handle(), m * cols_per_batch), copy_op); + + cudaDeviceSynchronize(); + raft::print_device_vector("scratch_space", scratch_space.data_handle(), m * cols_per_batch, std::cout); + auto scatter_op = [inout = inout.data_handle(), map = map.data_handle(), scratch_space = scratch_space.data_handle(), batch_offset, @@ -86,14 +108,26 @@ void scatter(raft::resources const& handle, n] __device__(auto idx) { IndexT row = idx / cols_per_batch; IndexT col = idx % cols_per_batch; - inout[map[row] * n + batch_offset + col] = scratch_space[idx]; + IndexT map_val = map[row]; + + inout[map_val * n + batch_offset + col] = scratch_space[idx]; return; }; auto counting = thrust::make_counting_iterator(0); - thrust::for_each(exec_policy, counting, counting + m * cols_per_batch, copy_op); + thrust::for_each(exec_policy, counting, counting + m * cols_per_batch, scatter_op); } } +template +void scatter(raft::resources const& handle, + raft::device_matrix_view inout, + raft::device_vector_view map, + IndexT batch_size) +{ + scatterInplaceImpl(handle, inout, map, batch_size); +} + + } // end namespace detail } // end namespace matrix } // end namespace raft \ No newline at end of file diff --git a/cpp/include/raft/matrix/gather.cuh b/cpp/include/raft/matrix/gather.cuh index aa2827a30d..f00244b752 100644 --- a/cpp/include/raft/matrix/gather.cuh +++ b/cpp/include/raft/matrix/gather.cuh @@ -291,8 +291,9 @@ void gather_if(const raft::resources& handle, /** * @brief In-place gather elements in a row-major matrix according to a - * map. The map specifies new order in which rows of the input matrix are - * rearranged, i.e. in the resulting matrix, row i is assigned to the position + * map. The map specifies the new order in which rows of the input matrix are + * rearranged, i.e. for each output row, read the index in the input matrix + * from the map, apply a transformation to this input index if specified, and copy the row. * map[i]. For example, the matrix [[1, 2, 3], [4, 5, 6], [7, 8, 9]] with the * map [2, 0, 1] will be transformed to [[7, 8, 9], [1, 2, 3], [4, 5, 6]]. * Batching is done on columns and an additional scratch space of @@ -301,21 +302,23 @@ void gather_if(const raft::resources& handle, * in the scratch space and copied back to the corresponding locations * in the input matrix. * - * @tparam matrix_t - * @tparam map_t - * @tparam idx_t + * @tparam matrix_t Matrix element type + * @tparam map_t Integer type of map elements + * @tparam map_xform_t Unary lambda expression or operator type. MapTransformOp's result type must + * be convertible to idx_t. + * @tparam idx_t Integer type used for indexing * * @param[in] handle raft handle * @param[inout] inout input matrix (n_rows * n_cols) * @param[in] map Pointer to the input sequence of gather locations - * @param[in] col_batch_size column batch size. Determines the size of the scratch space (map_length, col_batch_size) +* @param[in] col_batch_size (optional) column batch size. Determines the shape of the scratch space (map_length, col_batch_size). When set to zero (default), no batching is done and an additional scratch space of shape (map_lengthm, n_cols) is created. * @param[in] transform_op (optional) Transformation to apply to map values */ template void gather(raft::resources const& handle, raft::device_matrix_view inout, raft::device_vector_view map, - idx_t col_batch_size, + idx_t col_batch_size = 0, map_xform_t transform_op = raft::identity_op()) { detail::gather(handle, inout, map, transform_op, col_batch_size); diff --git a/cpp/include/raft/matrix/scatter.cuh b/cpp/include/raft/matrix/scatter.cuh index acf73b39f3..5e6850d214 100644 --- a/cpp/include/raft/matrix/scatter.cuh +++ b/cpp/include/raft/matrix/scatter.cuh @@ -23,37 +23,31 @@ namespace raft::matrix { /** * @brief In-place scatter elements in a row-major matrix according to a - * map. The length of the map is equal to the number of rows. The - * map specifies the destination index for each row, i.e. in the - * resulting matrix, row map[i] is assigned to row i. For example, + * map. The map specifies the new order in which rows of the input matrix are + * rearranged, i.e. read the destination index from the map, and copy the row. For example, * the matrix [[1, 2, 3], [4, 5, 6], [7, 8, 9]] with the map [2, 0, 1] will * be transformed to [[4, 5, 6], [7, 8, 9], [1, 2, 3]]. Batching is done on * columns and an additional scratch space of shape n_rows * cols_batch_size * is created. For each batch, chunks of columns from each row are copied * into the appropriate location in the scratch space and copied back to * the corresponding locations in the input matrix. + * Note: in-place scatter is not thread safe if the values in the map are not unique. + * Users must ensure that the map indices are unique and in the range [0, n_rows). * - * @tparam matrix_t - * @tparam map_t - * @tparam idx_t + * @tparam matrix_t Matrix element type + * @tparam idx_t Integer type used for indexing * * @param[in] handle raft handle * @param[inout] inout input matrix (n_rows * n_cols) - * @param[in] map map containing the order in which rows are to be rearranged (n_rows) - * @param[in] col_batch_size column batch size + * @param[in] map Pointer to the input sequence of scatter locations. The length of the map should be equal to the number of rows in the input matrix. Map indices should be unique and in the range [0, n_rows). The map represents a complete permutation of indices. + * @param[in] col_batch_size (optional) column batch size. Determines the shape of the scratch space (n_rows, col_batch_size). When set to zero (default), no batching is done and an additional scratch space of shape (n_rows, n_cols) is created. */ -template +template void scatter(raft::resources const& handle, - raft::device_matrix_view inout, - raft::device_vector_view map, - idx_t col_batch_size) + raft::device_matrix_view inout, + raft::device_vector_view map, + idx_t col_batch_size = 0) { - idx_t m = inout.extent(0); - idx_t n = inout.extent(1); - idx_t map_len = map.extent(0); - RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= n, "col_batch_size should be > 0 and <= n"); - RAFT_EXPECTS(map_len == m, "size of map should be equal to the number of rows in input matrix"); - detail::scatter(handle, inout, map, col_batch_size); } diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 871869102c..cb86db4bce 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -238,6 +238,7 @@ if(BUILD_TESTS) test/matrix/columnSort.cu test/matrix/diagonal.cu test/matrix/gather.cu + test/matrix/scatter.cu test/matrix/linewise_op.cu test/matrix/math.cu test/matrix/matrix.cu diff --git a/cpp/test/matrix/gather.cu b/cpp/test/matrix/gather.cu index a0b58598af..ccbfe0adab 100644 --- a/cpp/test/matrix/gather.cu +++ b/cpp/test/matrix/gather.cu @@ -100,6 +100,8 @@ class GatherTest : public ::testing::TestWithParam> { IdxT map_length = params.map_length; IdxT len = params.nrows * params.ncols; + if (map_length > params.nrows) map_length = params.nrows; + // input matrix setup d_in.resize(params.nrows * params.ncols, stream); h_in.resize(params.nrows * params.ncols); @@ -170,7 +172,7 @@ class GatherTest : public ::testing::TestWithParam> { } if (Inplace) { - raft::copy_async(d_out_act.data(), d_in.data(), params.map_length * params.ncols, raft::resource::get_cuda_stream(handle)); + raft::copy_async(d_out_act.data(), d_in.data(), map_length * params.ncols, raft::resource::get_cuda_stream(handle)); } resource::sync_stream(handle, stream); @@ -192,7 +194,7 @@ class GatherTest : public ::testing::TestWithParam> { { \ ASSERT_TRUE(devArrMatch(d_out_exp.data(), \ d_out_act.data(), \ - params.map_length* params.ncols, \ + d_out_exp.size(), \ raft::Compare())); \ } \ INSTANTIATE_TEST_CASE_P(GatherTests, test_name, ::testing::ValuesIn(test_inputs)) @@ -202,6 +204,11 @@ const std::vector> inputs_i32 = const std::vector> inputs_i64 = raft::util::itertools::product>( {25, 2000}, {6, 31, 129}, {11, 999}, {2, 3, 6}, {1234ULL}); +const std::vector> inplace_inputs_i32 = + raft::util::itertools::product>({2000}, {6, 31, 129}, {11, 999}, {2, 3, 6}, {1234ULL}); +const std::vector> inplace_inputs_i64 = + raft::util::itertools::product>( + {2000}, {6, 31, 129}, {11, 999}, {2, 3, 6}, {1234ULL}); GATHER_TEST((GatherTest), GatherTestFU32I32, inputs_i32); GATHER_TEST((GatherTest), @@ -220,7 +227,7 @@ GATHER_TEST((GatherTest), GATHER_TEST((GatherTest), GatherIfTransformTestFI64I64, inputs_i64); -GATHER_TEST((GatherTest), GatherInplaceTestFU32I32, inputs_i32); -GATHER_TEST((GatherTest), GatherInplaceTestFU32I64, inputs_i64); -GATHER_TEST((GatherTest), GatherInplaceTestFI64I64, inputs_i64); +GATHER_TEST((GatherTest), GatherInplaceTestFU32I32, inplace_inputs_i32); +GATHER_TEST((GatherTest), GatherInplaceTestFU32I64, inplace_inputs_i64); +GATHER_TEST((GatherTest), GatherInplaceTestFI64I64, inplace_inputs_i64); } // end namespace raft \ No newline at end of file diff --git a/cpp/test/matrix/scatter.cu b/cpp/test/matrix/scatter.cu new file mode 100644 index 0000000000..98389ecb6e --- /dev/null +++ b/cpp/test/matrix/scatter.cu @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.cuh" +#include "raft/core/logger-macros.hpp" +#include "raft/util/cudart_utils.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace raft { + +template +void naiveScatter(InputIteratorT in, + IdxT D, + IdxT N, + MapIteratorT map, + IdxT map_length, + OutputIteratorT out) +{ + for (IdxT outRow = 0; outRow < map_length; ++outRow) { + typename std::iterator_traits::value_type map_val = map[outRow]; + IdxT outRowStart = map_val * D; + IdxT inRowStart = outRow * D; + for (IdxT i = 0; i < D; ++i) { + out[outRowStart + i] = in[inRowStart + i]; + } + } +} + +template +struct ScatterInputs { + IdxT nrows; + IdxT ncols; + IdxT col_batch_size; + unsigned long long int seed; +}; + +template +class ScatterTest : public ::testing::TestWithParam> { + protected: + ScatterTest() + : stream(resource::get_cuda_stream(handle)), + params(::testing::TestWithParam>::GetParam()), + d_in(0, stream), + d_out_exp(0, stream), + d_map(0, stream) + { + } + + void SetUp() override + { + raft::random::RngState r(params.seed); + raft::random::RngState r_int(params.seed); + + IdxT len = params.nrows * params.ncols; + + // input matrix setup + d_in.resize(params.nrows * params.ncols, stream); + h_in.resize(params.nrows * params.ncols); + raft::random::uniform(handle, r, d_in.data(), len, MatrixT(-1.0), MatrixT(1.0)); + raft::update_host(h_in.data(), d_in.data(), len, stream); + + // map setup + d_map.resize(params.nrows, stream); + h_map.resize(params.nrows); + + auto exec_policy = raft::resource::get_thrust_policy(handle); + + thrust::counting_iterator permute_iter(0); + thrust::copy(exec_policy, permute_iter, permute_iter + params.nrows, d_map.data()); + + thrust::default_random_engine g; + thrust::shuffle(exec_policy, d_map.data(), d_map.data() + params.nrows, g); + + raft::update_host(h_map.data(), d_map.data(), params.nrows, stream); + resource::sync_stream(handle, stream); + + // expected and actual output matrix setup + h_out.resize(params.nrows * params.ncols); + d_out_exp.resize(params.nrows * params.ncols, stream); + + // launch scatter on the host and copy the results to device + naiveScatter(h_in.data(), + params.ncols, + params.nrows, + h_map.data(), + params.nrows, + h_out.data()); + raft::update_device(d_out_exp.data(), h_out.data(), params.nrows * params.ncols, stream); + + auto inout_view = raft::make_device_matrix_view( + d_in.data(), params.nrows, params.ncols); + auto map_view = raft::make_device_vector_view(d_map.data(), params.nrows); + + raft::matrix::scatter(handle, inout_view, map_view, params.col_batch_size); + resource::sync_stream(handle, stream); + + raft::print_device_vector("map", d_map.data(), params.nrows, std::cout); + raft::print_device_vector("d_out_exp", d_out_exp.data(), params.nrows * params.ncols, std::cout); + raft::print_device_vector("d_scatter", d_in.data(), params.nrows * params.ncols, std::cout); + } + + protected: + raft::resources handle; + cudaStream_t stream = 0; + ScatterInputs params; + std::vector h_in, h_out; + std::vector h_map; + rmm::device_uvector d_in, d_out_exp; + rmm::device_uvector d_map; +}; + +#define SCATTER_TEST(test_type, test_name, test_inputs) \ + typedef RAFT_DEPAREN(test_type) test_name; \ + TEST_P(test_name, Result) \ + { \ + ASSERT_TRUE(devArrMatch(d_in.data(), \ + d_out_exp.data(), \ + d_out_exp.size(), \ + raft::Compare())); \ + } \ + INSTANTIATE_TEST_CASE_P(ScatterTests, test_name, ::testing::ValuesIn(test_inputs)) + +const std::vector> inputs_i32 = + raft::util::itertools::product>({2000}, {6, 31, 129}, {2, 3, 6}, {1234ULL}); +const std::vector> inputs_i64 = + raft::util::itertools::product>( + {2000}, {6, 31, 129}, {2, 3, 6}, {1234ULL}); + +SCATTER_TEST((ScatterTest), ScatterTestFI32, inputs_i32); +SCATTER_TEST((ScatterTest), ScatterTestFI64, inputs_i64); +} // end namespace raft \ No newline at end of file From 81ca1fb564ce477b735f2eec360cfe06985aeb76 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Fri, 2 Jun 2023 17:51:31 -0700 Subject: [PATCH 27/53] bug free --- cpp/include/raft/matrix/detail/gather.cuh | 2 - cpp/include/raft/matrix/detail/scatter.cuh | 2 - .../neighbors/detail/connect_components.cuh | 77 +++----- cpp/test/distance/masked_nn.cu | 5 - cpp/test/matrix/scatter.cu | 7 +- .../sparse/neighbors/connect_components.cu | 185 ++++++++++++++++++ 6 files changed, 213 insertions(+), 65 deletions(-) diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index ae2e11ba2f..84a6b493d2 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -400,8 +400,6 @@ void gatherInplaceImpl(raft::resources const& handle, }; raft::linalg::map_offset(handle, scratch_space.view(), gather_op); - cudaDeviceSynchronize(); - raft::print_device_vector("gather_scratch_space", scratch_space.data_handle(), m * cols_per_batch, std::cout); auto copy_op = [inout = inout.data_handle(), map = map.data_handle(), scratch_space = scratch_space.data_handle(), diff --git a/cpp/include/raft/matrix/detail/scatter.cuh b/cpp/include/raft/matrix/detail/scatter.cuh index c78a1aeb8f..b0f5bc98cf 100644 --- a/cpp/include/raft/matrix/detail/scatter.cuh +++ b/cpp/include/raft/matrix/detail/scatter.cuh @@ -98,8 +98,6 @@ void scatterInplaceImpl(raft::resources const& handle, }; raft::linalg::map_offset(handle, raft::make_device_vector_view(scratch_space.data_handle(), m * cols_per_batch), copy_op); - cudaDeviceSynchronize(); - raft::print_device_vector("scratch_space", scratch_space.data_handle(), m * cols_per_batch, std::cout); auto scatter_op = [inout = inout.data_handle(), map = map.data_handle(), scratch_space = scratch_space.data_handle(), diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 2270f5c3e5..afcc981d3e 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -110,13 +111,17 @@ struct FixConnectivitiesRedOp { void gather(const raft::resources& handle, value_idx* map) { - thrust::gather(raft::resource::get_thrust_policy(handle), map, map + m, colors, colors); + auto tmp_colors = raft::make_device_vector(handle, m); + thrust::gather(raft::resource::get_thrust_policy(handle), map, map + m, colors, tmp_colors.data_handle()); + raft::copy_async(colors, tmp_colors.data_handle(), m, raft::resource::get_cuda_stream(handle)); } void scatter(const raft::resources& handle, value_idx* map) { + auto tmp_colors = raft::make_device_vector(handle, m); thrust::scatter( - raft::resource::get_thrust_policy(handle), colors, colors + m, map, colors); + raft::resource::get_thrust_policy(handle), colors, colors + m, map, tmp_colors.data_handle()); + raft::copy_async(colors, tmp_colors.data_handle(), m, raft::resource::get_cuda_stream(handle)); } }; @@ -258,16 +263,13 @@ void perform_1nn(raft::resources const& handle, raft::sparse::convert::sorted_coo_to_csr( colors, n_rows, colors_group_idxs.data_handle(), n_components + 1, stream); - raft::print_device_vector("colors", colors, n_rows, std::cout); - raft::print_device_vector("color_group_idxs", colors_group_idxs.data_handle(), n_components + 1, std::cout); - auto group_idxs_view = - raft::make_device_vector_view(colors_group_idxs.data_handle() + 1, n_components); + raft::make_device_vector_view(colors_group_idxs.data_handle() + 1, n_components); auto x_norm = raft::make_device_vector(handle, (value_idx)n_rows); raft::linalg::rowNorm( x_norm.data_handle(), X, n_cols, n_rows, raft::linalg::L2Norm, true, stream); - + auto adj = raft::make_device_matrix(handle, row_batch_size, n_components); using OutT = raft::KeyValuePair; using ParamT = raft::distance::masked_l2_nn_params; @@ -280,14 +282,17 @@ void perform_1nn(raft::resources const& handle, size_t n_batches = raft::ceildiv(n_rows, row_batch_size); + RAFT_LOG_INFO("row_batch_size %zu, col_batch_size %zu, n_batches %zu", row_batch_size, col_batch_size, n_batches); + for (size_t bid = 0; bid < n_batches; bid++) { size_t batch_offset = bid * row_batch_size; size_t rows_per_batch = min(row_batch_size, n_rows - batch_offset); + RAFT_LOG_INFO("rows_per_batch %zu, batch_offset %zu", rows_per_batch, batch_offset); auto X_batch_view = raft::make_device_matrix_view( X + batch_offset * n_cols, rows_per_batch, n_cols); - auto x_norm_batch_view = raft::make_device_vector_view( + auto x_norm_batch_view = raft::make_device_vector_view( x_norm.data_handle() + batch_offset, rows_per_batch); auto mask_op = [colors, @@ -303,11 +308,9 @@ void perform_1nn(raft::resources const& handle, raft::linalg::map_offset(handle, adj_vector_view, mask_op); - auto adj_view = raft::make_device_matrix_view( + auto adj_view = raft::make_device_matrix_view( adj.data_handle(), rows_per_batch, n_components); - raft::print_device_vector("adj", adj.data_handle(), rows_per_batch * n_components, std::cout); - auto kvp_view = raft::make_device_vector_view, value_idx>( kvp + batch_offset, rows_per_batch); @@ -321,6 +324,7 @@ void perform_1nn(raft::resources const& handle, adj_view, group_idxs_view, kvp_view); + } thrust::transform(exec_policy, @@ -335,11 +339,20 @@ void perform_1nn(raft::resources const& handle, }); raft::matrix::scatter(handle, X_mutable_view, sort_plan_const_view, (value_idx)col_batch_size); - auto it = thrust::make_zip_iterator(thrust::make_tuple(kvp, colors)); - thrust::scatter(exec_policy, it, it + n_rows, sort_plan.data_handle(), it); - // thrust::scatter(exec_policy, colors, colors + n_rows, sort_plan.data_handle(), colors); + + auto tmp_colors = raft::make_device_vector(handle, n_rows); + auto tmp_kvp = raft::make_device_vector (handle, n_rows); + + thrust::scatter(exec_policy, kvp, kvp + n_rows, sort_plan.data_handle(), tmp_kvp.data_handle()); + thrust::scatter(exec_policy, colors, colors + n_rows, sort_plan.data_handle(), tmp_colors.data_handle()); reduction_op.scatter(handle, sort_plan.data_handle()); + raft::copy_async(colors, tmp_colors.data_handle(), n_rows, stream); + raft::copy_async(kvp, tmp_kvp.data_handle(), n_rows, stream); + + auto keys = raft::make_device_vector(handle, n_rows); + raft::linalg::map_offset(handle, keys.view(), [kvp]__device__(auto idx) { return kvp[idx].key; }); + LookupColorOp extract_colors_op(colors); thrust::transform(exec_policy, kvp, kvp + n_rows, nn_colors, extract_colors_op); } @@ -480,8 +493,6 @@ void connect_components(raft::resources const& handle, raft::label::make_monotonic( colors.data(), const_cast(orig_colors), n_rows, stream, zero_based); - raft::print_device_vector("orig_colors", orig_colors, n_rows, std::cout); - /** * First compute 1-nn for all colors where the color of each data point * is guaranteed to be != color of its nearest neighbor. @@ -535,45 +546,11 @@ void connect_components(raft::resources const& handle, min_components_by_color( min_edges, out_index.data(), src_indices.data(), temp_inds_dists.data(), n_rows, stream); - raft::sparse::op::coo_sort(n_rows, - n_rows, - min_edges.nnz, - min_edges.rows(), - min_edges.cols(), - min_edges.vals(), - stream); - - rmm::device_uvector min_edges_row_colors(min_edges.nnz, stream); - rmm::device_uvector min_edges_col_colors(min_edges.nnz, stream); - - thrust::transform(rmm::exec_policy(stream), min_edges.rows(), min_edges.rows() + min_edges.nnz, min_edges_row_colors.data(), [orig_colors]__device__(auto idx) {return orig_colors[idx];}); - thrust::transform(rmm::exec_policy(stream), min_edges.cols(), min_edges.cols() + min_edges.nnz, min_edges_col_colors.data(), [orig_colors]__device__(auto idx) {return orig_colors[idx];}); - - - raft::print_device_vector("mnn_min_edges_rows", min_edges.rows(), min_edges.nnz, std::cout); - raft::print_device_vector("mnn_min_edges_cols", min_edges.cols(), min_edges.nnz, std::cout); - raft::print_device_vector("mnn_min_edges_vals", min_edges.vals(), min_edges.nnz, std::cout); - raft::print_device_vector("row_colors", min_edges_row_colors.data(), min_edges.nnz, std::cout); - raft::print_device_vector("col_colors", min_edges_col_colors.data(), min_edges.nnz, std::cout); - /** * Symmetrize resulting edge list */ raft::sparse::linalg::symmetrize( handle, min_edges.rows(), min_edges.cols(), min_edges.vals(), n_rows, n_rows, size, out); - - rmm::device_uvector row_colors(out.nnz, stream); - rmm::device_uvector col_colors(out.nnz, stream); - - thrust::transform(rmm::exec_policy(stream), out.rows(), out.rows() + out.nnz, row_colors.data(), [orig_colors]__device__(auto idx) {return orig_colors[idx];}); - thrust::transform(rmm::exec_policy(stream), out.cols(), out.cols() + out.nnz, col_colors.data(), [orig_colors]__device__(auto idx) {return orig_colors[idx];}); - - - raft::print_device_vector("mnn_out_rows", out.rows(), out.nnz, std::cout); - raft::print_device_vector("mnn_out_cols", out.cols(), out.nnz, std::cout); - raft::print_device_vector("mnn_out_vals", out.vals(), out.nnz, std::cout); - raft::print_device_vector("row_colors", row_colors.data(), out.nnz, std::cout); - raft::print_device_vector("col_colors", col_colors.data(), out.nnz, std::cout); } diff --git a/cpp/test/distance/masked_nn.cu b/cpp/test/distance/masked_nn.cu index b7300e34ac..00653f4ced 100644 --- a/cpp/test/distance/masked_nn.cu +++ b/cpp/test/distance/masked_nn.cu @@ -272,11 +272,6 @@ auto run_masked_nn(const raft::handle_t& handle, Inputs inp, const Params // Create output auto out = raft::make_device_vector(handle, p.m); - RAFT_LOG_INFO("adj extents %d %d", inp.adj.extent(0), inp.adj.extent(1)); - RAFT_LOG_INFO("group_idxs extents %d", inp.group_idxs.extent(0)); - raft::print_device_vector("adj", inp.adj.data_handle(), inp.adj.extent(0) * inp.adj.extent(1), std::cout); - raft::print_device_vector("group_idxs", inp.group_idxs.data_handle(), inp.group_idxs.extent(0), std::cout); - // Launch kernel raft::distance::masked_l2_nn(handle, masked_l2_params, diff --git a/cpp/test/matrix/scatter.cu b/cpp/test/matrix/scatter.cu index 98389ecb6e..0824633323 100644 --- a/cpp/test/matrix/scatter.cu +++ b/cpp/test/matrix/scatter.cu @@ -15,8 +15,7 @@ */ #include "../test_utils.cuh" -#include "raft/core/logger-macros.hpp" -#include "raft/util/cudart_utils.hpp" +#include #include #include #include @@ -122,10 +121,6 @@ class ScatterTest : public ::testing::TestWithParam> { raft::matrix::scatter(handle, inout_view, map_view, params.col_batch_size); resource::sync_stream(handle, stream); - - raft::print_device_vector("map", d_map.data(), params.nrows, std::cout); - raft::print_device_vector("d_out_exp", d_out_exp.data(), params.nrows * params.ncols, std::cout); - raft::print_device_vector("d_scatter", d_in.data(), params.nrows * params.ncols, std::cout); } protected: diff --git a/cpp/test/sparse/neighbors/connect_components.cu b/cpp/test/sparse/neighbors/connect_components.cu index b3b42084d3..1a9a99f5eb 100644 --- a/cpp/test/sparse/neighbors/connect_components.cu +++ b/cpp/test/sparse/neighbors/connect_components.cu @@ -360,5 +360,190 @@ TEST_P(ConnectComponentsTestF_Int, Result) INSTANTIATE_TEST_CASE_P(ConnectComponentsTest, ConnectComponentsTestF_Int, ::testing::ValuesIn(fix_conn_inputsf2)); + + +template + struct MutualReachabilityFixConnectivitiesRedOp { + value_t* core_dists; + value_idx m; + + DI MutualReachabilityFixConnectivitiesRedOp() : m(0) {} + + MutualReachabilityFixConnectivitiesRedOp(value_t* core_dists_, value_idx m_) + : core_dists(core_dists_), m(m_) {}; + + typedef typename raft::KeyValuePair KVP; + DI void operator()(value_idx rit, KVP* out, const KVP& other) const + { + if (rit < m && other.value < std::numeric_limits::max()) { + value_t core_dist_rit = core_dists[rit]; + value_t core_dist_other = max(core_dist_rit, max(core_dists[other.key], other.value)); + + value_t core_dist_out; + if (out->key > -1) { + core_dist_out = max(core_dist_rit, max(core_dists[out->key], out->value)); + } else { + core_dist_out = out->value; + } + + bool smaller = core_dist_other < core_dist_out; + out->key = smaller ? other.key : out->key; + out->value = smaller ? core_dist_other : core_dist_out; + } + } + + DI KVP operator()(value_idx rit, const KVP& a, const KVP& b) const + { + if (rit < m && a.key > -1) { + value_t core_dist_rit = core_dists[rit]; + value_t core_dist_a = max(core_dist_rit, max(core_dists[a.key], a.value)); + + value_t core_dist_b; + if (b.key > -1) { + core_dist_b = max(core_dist_rit, max(core_dists[b.key], b.value)); + } else { + core_dist_b = b.value; + } + + return core_dist_a < core_dist_b ? KVP(a.key, core_dist_a) : KVP(b.key, core_dist_b); + } + + return b; + } + + DI void init(value_t* out, value_t maxVal) const { *out = maxVal; } + DI void init(KVP* out, value_t maxVal) const + { + out->key = -1; + out->value = maxVal; + } + + DI void init_key(value_t& out, value_idx idx) const { return; } + DI void init_key(KVP& out, value_idx idx) const { out.key = idx; } + + DI value_t get_value(KVP& out) const { return out.value; } + DI value_t get_value(value_t& out) const { return out; } + + void gather(const raft::resources& handle, value_idx* map) + { + auto tmp_core_dists = raft::make_device_vector(handle, m); + thrust::gather(raft::resource::get_thrust_policy(handle), map, map + m, core_dists, tmp_core_dists.data_handle()); + raft::copy_async(core_dists, tmp_core_dists.data_handle(), m, raft::resource::get_cuda_stream(handle)); + } + + void scatter(const raft::resources& handle, value_idx* map) + { + auto tmp_core_dists = raft::make_device_vector(handle, m); + thrust::scatter( + raft::resource::get_thrust_policy(handle), core_dists, core_dists + m, map, tmp_core_dists.data_handle()); + raft::copy_async(core_dists, tmp_core_dists.data_handle(), m, raft::resource::get_cuda_stream(handle)); + } +}; + +template +struct ConnectComponentsMutualReachabilityInputs { + value_idx n_row; + value_idx n_col; + std::vector data; + std::vector core_dists; + std::vector colors; + std::vector expected_rows; + std::vector expected_cols; + std::vector expected_vals; +}; + +template +class ConnectComponentsEdgesTest + : public ::testing::TestWithParam> { + protected: + void basicTest() + { + raft::resources handle; + + auto stream = resource::get_cuda_stream(handle); + + params = ::testing::TestWithParam>::GetParam(); + + raft::sparse::COO out_edges(resource::get_cuda_stream(handle)); + + rmm::device_uvector data(params.n_row * params.n_col, + resource::get_cuda_stream(handle)); + rmm::device_uvector core_dists(params.n_row, + resource::get_cuda_stream(handle)); + rmm::device_uvector colors(params.n_row, + resource::get_cuda_stream(handle)); + + raft::copy(data.data(), params.data.data(), data.size(), resource::get_cuda_stream(handle)); + raft::copy(core_dists.data(), params.core_dists.data(), core_dists.size(), resource::get_cuda_stream(handle)); + raft::copy(colors.data(), params.colors.data(), colors.size(), resource::get_cuda_stream(handle)); + + /** + * 3. connect_components to fix connectivities + */ + MutualReachabilityFixConnectivitiesRedOp red_op(core_dists.data(), params.n_row); + + raft::linkage::connect_components(handle, + out_edges, + data.data(), + colors.data(), + params.n_row, + params.n_col, + red_op, + params.n_row, + params.n_col); + + ASSERT_TRUE(devArrMatch(out_edges.rows(), + params.expected_rows.data(), + out_edges.nnz, + Compare())); + + ASSERT_TRUE(devArrMatch(out_edges.cols(), + params.expected_cols.data(), + out_edges.nnz, + Compare())); + + ASSERT_TRUE(devArrMatch(out_edges.vals(), + params.expected_vals.data(), + out_edges.nnz, + CompareApprox(1e-4))); + } + + void SetUp() override { basicTest(); } + + void TearDown() override {} + + protected: + ConnectComponentsMutualReachabilityInputs params; +}; + +const std::vector> mr_fix_conn_inputsf2 = { + {100, + 2, + {-7.72642,-8.39496,5.4534,0.742305,-2.97867,9.55685,6.04267,0.571319,-6.52184,-6.31932,3.64934,1.40687,-2.17793,9.98983,4.42021,2.33028,4.73696,2.94181,-3.66019,9.38998,-3.05358,9.12521,-6.65217,-5.57297,-6.35769,-6.58313,-3.61553,7.81808,-1.77073,9.18565,-7.95052,-6.39764,-6.60294,-6.05293,-2.58121,10.0178,-7.76348,-6.72638,-6.40639,-6.95294,-2.97262,8.54856,-6.95673,-6.53896,-7.32614,-6.02371,-2.1478,10.5523,-2.54502,10.5789,-2.96984,10.0714,3.22451,1.55252,-6.25396,-7.73727,-7.85431,-6.09303,-8.11658,-8.20057,-7.55965,-6.64786,4.936,2.23423,4.44752,2.27472,-5.72103,-7.70079,-0.929985,9.78172,-3.10984,8.72259,-2.44167,7.58954,-2.18511,8.6292,5.55528,2.30192,4.73164,-0.0143992,-8.2573,-7.81793,-2.98837,8.82863,4.60517,0.804492,-3.83738,9.21115,-2.62485,8.71318,3.57758,2.44676,-8.48711,-6.69548,-6.70645,-6.49479,-6.86663,-5.42658,3.83139,1.47141,2.02013,2.79507,4.64499,1.73858,-1.69667,10.3705,-6.61974,-6.09829,-6.05757,-4.98332,-7.10309,-6.16611,-3.52203,9.32853,-2.26724,7.10101,6.11777,1.4549,-4.23412,8.452,-6.58655,-7.59446,3.93783,1.64551,-7.12502,-7.63385,2.72111,1.94666,-7.14428,-4.15994,-6.66553,-8.12585,4.70011,4.43641,-7.76914,-7.69592,4.11012,2.48644,4.89743,1.89872,4.29716,1.17089,-6.62913,-6.53366,-8.07093,-6.22356,-2.16558,7.25125,4.73953,1.46969,-5.91625,-6.46733,5.43091,1.06378,-6.82142,-8.02308,6.52606,2.14775,3.08922,2.04173,-2.14756,8.36917,3.85663,1.65111,-1.68665,7.79344,-5.01385,-6.40628,-2.52269,7.95658,-2.30033,7.05462,-1.04355,8.78851,3.72045,3.5231,-3.98772,8.29444,4.24777,0.509655,4.72693,1.67416,5.7827,2.7251,-3.41722,7.60198,5.22674,4.16363,-3.1109,10.8666,-3.18612,9.62596,-1.4782,9.94557,4.47859,2.37722,-5.79658,-5.82631,-3.34842,8.70507}, + {0.978428,1.01917,0.608673,1.45629,0.310713,0.689461,0.701126,0.63296,0.774788,0.701648,0.513282,0.757651,0.45638,0.973111,0.901396,0.613692,0.482497,0.688143,0.72428,0.666345,0.58232,0.554756,0.710315,0.903611,0.694115,0.796099,0.639759,0.798998,0.639839,1.30727,0.663729,0.57476,0.571348,1.14662,1.26518,0.485068,0.78207,0.791621,1.01678,1.28509,1.14715,0.381395,0.850507,0.788511,0.588341,0.878516,0.928669,0.405874,0.776421,0.612274,1.84963,0.57476,0.95226,0.488078,1.24868,0.515136,0.589378,0.903632,1.01678,1.09964,0.666345,0.713265,0.877168,1.10053,1.96887,1.03574,2.03728,0.969553,0.774788,0.586338,0.65168,0.435472,0.664396,0.790584,0.678637,0.715964,0.865494,0.978428,1.59242,0.861109,0.833259,0.65168,0.903632,1.49599,0.76347,0.960453,1.1848,1.37398,0.928957,1.07848,0.661798,1.21104,1.04579,1.89047,1.24288,0.529553,0.903611,0.620897,0.882467,0.647189}, + {0,1,2,1,0,1,2,1,1,2,2,0,0,2,2,0,0,2,0,0,2,0,0,2,2,2,1,0,0,0,0,1,1,0,2,2,2,2,1,1,0,2,1,2,2,1,0,0,0,1,1,1,2,0,0,0,2,2,1,2,0,1,0,1,0,0,1,0,1,1,1,0,0,2,1,0,1,0,1,1,2,1,2,0,2,2,2,1,2,1,1,1,2,1,2,2,2,1,0,2}, + {50,54,57,63,82,87}, + {57,63,50,54,87,82}, + {6.0764,11.1843,6.0764,11.1843,6.89004,6.89004}}, + {1000, + 2, + {-6.59634,-7.13901,-6.13753,-6.58082,5.19821,2.04918,-2.96856,8.16444,-2.76879,7.51114,-6.82261,-6.61152,5.02008,2.58376,5.55621,2.31966,4.86379,3.33731,5.84639,1.15623,-2.17159,8.60241,-4.97844,-6.94077,-2.31014,8.41407,5.5582,0.402669,5.25265,0.919754,5.85298,2.11489,-3.29245,8.69222,-1.9621,8.81209,-1.53408,8.86723,-2.18227,8.79519,4.60519,2.20738,-6.4759,-6.9043,-7.18766,-6.10045,-9.00148,-7.48793,4.01674,1.41769,-2.45347,10.1085,-3.20892,9.22827,-3.18612,9.62596,4.81977,3.36517,4.90693,2.8628,-6.44269,-5.68946,-8.30144,-5.37878,4.61485,2.79094,-1.98726,9.31127,-3.66019,9.38998,-6.58607,-8.23669,-7.46015,-6.29153,4.08468,3.85433,-6.36842,-5.50645,-6.83602,-5.18506,-0.627173,10.3597,3.98846,1.48928,-2.9968,8.58173,-7.2144,-7.28376,-0.660242,10.1409,-4.23528,-8.38308,-3.15984,8.52716,-2.40987,9.76567,-8.7548,-6.76508,4.56971,0.312209,-7.5487,-5.8402,-1.6096,9.32159,5.04813,0.270586,-7.6525,-6.47306,-1.79758,7.88964,-9.0153,-3.74236,-3.5715,9.48788,-1.65154,8.85435,-3.47412,9.70034,6.31245,2.39219,4.03851,2.29295,-3.17098,9.86672,-6.90693,-7.81338,-6.22373,-6.68537,-3.22204,9.12072,-0.365254,9.6482,-7.76712,-7.31757,4.15669,3.54716,4.1937,0.083629,-3.03896,9.52755,-6.29293,-7.35501,-2.95926,9.63714,4.02709,1.58547,4.56828,1.93595,5.6242,1.75918,-7.36237,-7.83344,5.32177,3.81988,-2.43183,8.153,-1.97939,10.4559,-3.49492,9.51833,3.39602,1.28026,-2.42215,8.71528,-3.57682,8.87191,-2.77385,11.7345,5.71351,0.946654,-6.50253,-6.90937,4.08239,0.603367,-5.64134,-6.85884,-2.76177,7.7665,-2.25165,8.93984,-3.49071,9.47639,-1.06792,7.57842,5.15754,1.24743,3.63574,1.20537,-6.07969,-8.49642,4.12227,2.19696,-7.17144,-8.4433,-1.92234,11.2047,3.23237,1.19535,3.85389,0.641937,4.82665,1.21779,-7.68923,-6.45605,-7.00816,-8.76196,-5.12894,9.83619,-5.66247,-5.35879,3.05598,2.73358,6.06038,1.40242,-1.69568,7.78342,5.13391,2.23384,-2.96984,10.0714,-5.36618,-6.2493,5.55896,1.6829,3.55882,2.58911,5.36155,0.844118,-0.0634456,9.14351,4.88368,1.40909,-7.04675,-6.59753,-7.78333,-6.55575,5.39881,2.25436,-2.85189,8.64285,-2.22821,8.39159,3.88591,1.69249,-7.55481,-7.02463,4.60032,2.65467,-6.90615,-7.76198,-6.76005,-7.85318,4.15044,3.01733,-7.18884,-7.63227,4.68874,2.01376,3.51716,2.35558,-3.81367,9.68396,4.42644,3.4639,4.81758,0.637825,-6.20705,-4.98023,-1.68603,9.0876,-4.99504,-5.33687,-1.77073,9.18565,4.86433,3.02027,4.20538,1.664,4.59042,2.64799,-3.09856,9.86389,-3.02306,7.95507,-6.32402,-6.79053,-7.67205,-7.18807,-8.10918,-6.38341,-1.67979,6.80315,4.00249,3.16219,-2.54391,7.84561,-3.22764,8.80084,-2.63712,8.05875,-2.41744,7.02672,-6.71117,-5.56251,5.18348,1.60256,-7.40824,-6.29375,-4.22233,10.3682,4.8509,1.87646,-2.99456,9.09616,5.1332,2.15801,-2.27358,9.78515,-6.73874,-8.64855,4.96124,2.39509,-3.70949,8.67978,-4.13674,9.06237,2.80367,2.48116,-0.876786,7.58414,-3.7005,9.67084,6.48652,0.903085,6.28189,2.98299,-6.07922,-6.12582,-5.67921,-7.537,4.55014,3.41329,-1.63688,9.19763,-4.02439,10.3812,5.23053,3.08187,-2.2951,7.76855,-6.24491,-5.77041,6.02415,2.53708,-6.91286,-7.08823,4.83193,1.66405,-7.07454,-5.74634,-2.09576,10.8911,3.29543,1.05452,-3.49973,8.44799,5.2922,0.396778,-2.54502,10.5789,-6.38865,-6.14523,-1.75221,8.09212,-9.30387,-5.99606,-2.98113,10.1032,-6.2017,-7.36802,4.63628,0.814805,-1.81905,8.61307,4.88926,3.55062,3.08325,2.57918,-2.51717,10.4942,-5.75358,-6.9315,6.36742,2.40949,5.74806,0.933264,4.74408,1.91058,-7.41496,-6.97064,-2.98414,8.36096,6.72825,1.83358,-2.95349,9.39159,-3.35599,7.49944,6.18738,3.76905,-3.17182,9.58488,5.17863,1.0525,-3.0397,8.43847,-2.23874,8.96405,3.04689,2.41364,6.14064,2.82339,-6.33334,-6.87369,-7.92444,-8.84647,3.65129,0.86958,5.29842,3.98337,-2.06538,9.78892,-6.89494,-6.30082,-2.52144,8.11703,-8.11398,-7.47257,5.3381,2.36666,-6.93452,-6.59456,-7.50634,-6.01772,6.23438,1.12621,-2.15218,8.32138,-7.04777,-7.3522,-2.52771,8.72563,-2.77907,8.03552,4.29123,1.62391,-8.07551,-6.43551,-3.28202,8.77747,-2.21308,9.27534,-8.25153,-8.49367,-3.54644,8.82395,-8.05867,-5.69243,4.46681,1.98875,3.8362,3.61229,-6.96231,-7.00186,5.18993,1.00483,-5.35116,-6.37227,5.23298,1.66362,-5.68306,-7.03864,-9.03144,-7.59926,-6.10127,-7.4313,4.83572,0.994797,-7.32695,-5.59909,0.569683,10.1339,3.35957,2.84563,-2.4122,9.60944,5.00855,1.57983,-2.57528,7.80327,3.96349,3.77411,4.59429,2.21651,-6.54765,-6.68961,4.76798,1.29212,-1.67351,7.88458,5.63615,1.47941,-2.5301,9.13161,4.26075,1.76959,4.67788,2.0932,4.39955,1.59835,3.91274,1.72565,-4.1786,9.55765,-7.34566,-8.47481,4.8364,2.68217,-7.36848,-7.99973,-5.84708,-5.7534,5.37252,1.89245,-2.1707,8.599,-1.3299,9.0818,-6.79122,-5.40258,5.56391,1.78827,-0.194539,7.14702,4.60489,3.74397,5.50995,2.46885,-3.98772,8.29444,-5.21837,-7.33721,-1.63959,10.3699,-5.92932,-5.1695,-5.88358,-7.6369,4.11716,3.02218,-6.54114,-7.17551,3.97179,2.96521,-6.75325,-4.94118,5.26169,0.402945,3.25031,0.327771,-0.44845,10.7696,-2.15141,9.57507,7.04329,1.91555,-3.74615,7.69383,-7.52318,-5.85015,-6.80419,-8.48208,-4.57664,8.92517,4.57574,2.30193,4.84098,3.02382,-9.43355,-5.94579,-3.52203,9.32853,3.43018,2.5731,-6.15725,-7.25294,-6.69861,-8.17694,-2.40955,8.51081,-4.82342,-7.98332,-7.10611,-6.51274,5.86755,0.763529,-6.56045,-5.53966,-3.61553,7.81808,4.3825,0.304586,-6.52818,-5.80996,4.59972,0.542395,-6.90603,-6.59995,-6.3585,-6.23489,-6.01915,-7.46319,-5.38694,-7.15123,-7.83475,-6.45651,5.89564,1.07856,-5.15266,-7.27975,-6.97978,-7.08378,5.83493,0.449983,-2.62374,10.2521,-7.34494,-6.98606,-6.79719,-8.33766,3.54757,1.65676,-8.40528,-5.61753,-5.85556,-6.28758,4.66862,3.25162,-6.26047,-4.82261,4.61552,4.11544,-1.36637,9.76622,4.2517,2.14359,-2.45099,7.87132,-0.376164,7.0622,4.34493,3.22091,6.95921,2.36649,-6.70319,-7.24714,-5.56932,-5.48443,-7.43149,-4.32191,-3.23956,9.23074,-5.77255,-7.00049,4.96601,0.722056,-7.88617,-5.74023,4.18757,-0.45071,-7.12569,-7.72336,5.27366,2.38697,3.93487,1.9174,3.19186,-0.225636,-3.41722,7.60198,-3.08286,8.46743,-5.87905,-7.55073,-5.26425,-7.20243,-2.97867,9.55685,-1.23153,8.42272,-2.33602,9.3996,-3.33819,8.45411,-3.58009,9.49676,3.78152,2.67348,-1.54582,9.42707,-4.04331,10.292,3.3452,3.134,-2.75494,8.74156,-3.26555,7.59203,-7.27139,-7.80252,3.5293,3.72544,6.11642,3.35326,4.01611,3.8872,4.89591,2.95586,-7.06677,-5.89438,4.19438,3.42655,-6.11355,-5.65318,-7.59645,-8.74665,-5.80362,-6.8588,3.80453,4.11832,5.70655,3.14247,-4.98084,8.21739,-1.87642,11.285,4.39864,2.32523,-3.48388,9.80137,4.02836,0.566509,-2.41212,9.98293,-5.40846,-7.08943,4.01506,1.99926,-3.43613,8.95476,-7.24458,-7.71932,6.02204,2.62188,-6.29999,-6.55431,6.19038,0.974816,3.55882,3.02632,-7.06011,-3.687,-1.55877,8.43738,-5.14711,-4.64881,4.7167,0.690177,-7.90381,-5.02602,4.17218,2.31967,-0.643423,9.48812,-7.95237,-6.64086,-4.05986,9.08285,-6.24158,-6.37927,-6.6105,-7.2233,-6.21675,-5.70664,-3.29967,9.48575,3.41775,2.68617,-2.24948,8.10997,-2.24931,9.79611,-9.0523,-6.03269,-2.2587,9.36073,5.20965,2.42088,-3.10159,8.1503,-6.67906,-5.73147,4.0687,2.54575,-1.24229,8.30662,-2.09627,8.45056,-7.87801,-6.57832,4.72216,3.03865,-0.929985,9.78172,-8.56307,-7.68598,-7.05257,-5.1684,-7.09076,-7.86729,4.61432,3.1459,-6.34133,-5.8076,-3.82943,10.8457,-8.46082,-5.98507,5.34763,1.4107,-1.68714,10.9111,-1.67886,8.1582,-0.623012,9.18886,-4.21258,8.95874,-2.16744,10.8905,-6.57158,-7.27176,2.14047,4.26411,-8.44217,-7.40916,5.29008,1.87399,4.31824,4.04992,-3.77008,9.93215,-2.72688,10.1131,-6.14278,-7.16144,-3.92457,8.59364,-5.92649,-6.59299,4.68369,1.82617,-6.89905,-7.18329,3.95173,4.22561,-7.66453,-6.23183,-2.44167,7.58954,-6.36603,-7.41281,-6.45081,-6.187,-6.6125,-6.37138,5.46036,2.48044,-2.14756,8.36917,-2.3889,9.52872,3.80752,2.44459,-3.98778,10.158,-6.63887,-4.27843,-8.65266,-5.61819,-7.97003,-5.46918,-5.9604,-7.54825,-0.916011,8.50307,-3.69246,6.97505,-7.98533,-7.09503,-2.30033,7.05462,4.76218,2.51647,-7.04981,-7.33334,3.66401,3.02681,-2.50408,8.7797,7.19996,1.87711,4.01291,3.78562,-0.356015,8.24694,-0.958046,9.12996,4.60675,3.76773,6.21945,1.45031,4.27744,0.8535,-4.72232,-7.48582,6.03923,2.8978,-3.26833,9.16468,-7.97059,-7.29092,-2.3998,9.74005,-2.66721,8.58741,-7.36269,-6.73332,-7.87893,-7.38488,4.65023,0.661333,-4.8171,-7.94764,-4.11564,9.21775,4.80633,2.46562,-2.72887,9.3714,-5.26735,-5.5652,4.9826,2.42992,-6.17018,-7.3156,4.38084,1.77682,5.35084,2.41743,-2.61796,9.416,5.27229,2.94572,-7.52315,-5.95227,-1.45077,7.25555,-3.79916,7.71921,-2.23251,9.84147,3.70054,1.82908,-1.93831,10.1499,-6.18324,-5.9248,-3.33142,9.25797,-6.08536,-8.1344,5.95727,2.17077,4.87366,0.417274,-6.529,-6.39092,-9.24256,-7.88984,-6.36652,-7.13966,-3.90777,9.57726,-7.06252,-5.50523,-2.26423,8.50734,-2.84498,10.6833,5.0391,2.62037,-2.74815,8.10672,3.35945,3.72796,-4.11668,9.19892,5.66903,2.44577,-1.63807,8.68826,-7.42587,-6.48831,6.17063,3.19193,-2.28511,9.02688,-7.10088,-7.15692,4.46293,1.17487,-5.91017,-6.45292,-2.26724,7.10101,-2.43339,8.33712,-4.63309,8.48853,-3.31769,8.51253,-2.49078,10.6907,-1.30798,8.60621,6.30535,2.98754,-5.79384,-6.78213,-1.93213,8.81124,4.55773,3.09047,6.37584,2.17108,4.3927,1.29119,-3.2245,9.69388,-1.69634,9.64392,2.799,0.693593,-2.1426,8.07441,-8.4505,-8.00688,4.736,1.51089,-2.5863,9.35544,-2.94924,9.14503,6.2054,1.90742,5.67172,0.487609,-5.69071,-6.17181,-8.24651,-7.10488,-7.34424,-6.67895,-6.71977,-7.90778,-1.82294,7.40157,-9.40991,-7.16611,-4.37999,8.66277,-1.42615,10.0681,-2.00828,8.03673,-7.50228,-6.6855,-5.65859,-6.29801,-8.02335,-6.77155,-3.40761,9.50621,-2.82447,9.77326,-1.5938,9.34304,-3.5213,7.35943,-3.36961,8.62973,-7.01708,-5.92724,5.20886,3.60157,-1.71817,8.1049,-2.46363,8.36269,-2.77809,7.90776,-2.75459,8.26055,-2.03596,8.94146,-4.53434,9.20074,-7.44387,-6.69556,-6.90099,-7.62732,3.29169,2.71643,6.08686,2.16972,-2.31111,8.86993,-5.75046,7.9899,4.69951,1.32623,4.71851,-0.025031,-6.42374,-4.71511,-8.04974,-8.68209,-3.16103,9.06168,-6.18267,-7.21393,-7.94202,-6.4518,-7.07697,-7.03138,3.93554,0.564708,-1.20372,9.03529,-7.10611,-7.83955,-7.47529,-5.50567,-6.15453,-6.36393,-2.98024,9.24634,-7.75761,-7.70699,-3.08597,9.76968,-8.04954,-9.75237,5.2534,0.950377,5.63789,-0.923086,-5.7065,-6.51047,-8.02132,-7.07377,-8.28594,-6.96322,-7.70722,-6.79397,-2.4962,10.4678,5.02846,4.46617,4.02648,1.6707,-0.319395,8.20599,4.74525,0.639144,-1.0313,8.49602,4.08766,2.6061,3.63826,1.69207,2.55795,3.66963,5.2826,3.30232,-1.04355,8.78851,-6.84762,-7.63353,-4.70868,-7.056,3.53651,-0.179721,-3.38482,7.63149,-5.9265,-6.36702,-0.986074,9.5532,-2.42261,8.85861,-7.42835,-6.78726,-4.02857,8.53005,-8.22675,-7.85172,-5.57529,-8.5426,6.03009,2.53098,-7.10448,-7.53011,-3.4988,8.8885,-2.62485,8.71318,-6.39489,-7.72647,3.93789,1.31027,4.27627,1.91622,-0.923181,7.77647,-5.16017,10.1058,-6.44307,-5.97617,-7.24495,-6.69543,6.27331,0.826824,-6.55655,-7.13246,5.66245,4.41292,-2.13805,8.4103,5.23463,2.82659,-4.86624,-6.74357,-6.14082,-6.26474,-2.67048,9.41834,-1.26311,6.9409,-7.20231,-7.13094,-1.35109,9.80595,3.9906,0.749229,-6.75696,-5.25543,4.84826,-0.0685652,-7.4914,-6.91715,4.46725,2.85683,-2.95571,9.87068,6.32381,1.51429,-6.81177,-6.02734,-2.57188,9.96943,-4.28792,10.5103,3.65025,2.91394,-7.11856,-7.24693,-6.98693,-6.43239,4.7651,1.54376,4.00092,0.65008,-7.14816,-7.7713,-7.58803,-8.39382,4.3321,2.19232,-7.89545,-6.81843,-2.11475,8.5933,-0.743743,9.41927,3.64849,-0.18022,-1.68665,7.79344,4.00214,1.44217,-6.96799,-7.25012,-1.58302,10.9237,-6.68524,-7.23328,4.65831,2.32075,4.62024,2.52566,-4.23412,8.452,-0.822056,9.89593,-7.19868,-7.67614,-3.32742,11.1067,5.27861,0.830165,4.48982,2.09875,-6.58087,-7.6319,-0.880582,7.63418,-7.01088,-6.80326,-7.31601,-6.98972,-6.85883,-7.60811,6.14328,2.85053,-7.49206,-6.51861,-2.28174,10.3214,4.81074,1.78919,-5.58987,-6.20693,4.08096,2.35038,-1.5029,8.43739,4.11536,2.46254,-3.28299,7.76963,4.31953,2.39734,4.91146,0.696421,-1.4782,9.94557,-3.34842,8.70507,-6.97822,-6.86126,4.10012,1.19486,-2.50395,9.06127,4.41891,2.00006,-2.73266,9.72829,3.5436,0.533119,5.78864,0.233456,-6.62589,-6.41242,-2.21942,11.0897,-6.76636,-8.31839,-2.71732,8.52129,-5.20972,-6.48544,3.26056,1.24224,3.45228,2.28299,4.72171,1.87428,-7.52585,-5.1048,5.0695,2.18086,-6.55646,-7.02771,3.23727,3.72275,3.41411,0.508795,-7.80698,-6.64174,-5.90443,-6.37902,-0.387041,10.0468,-1.3506,8.1936,-6.08614,-8.62864,-5.91478,-5.26453,-2.61623,7.97904,4.45459,1.84335,-6.66643,-7.63208,3.6729,1.92546,-1.32976,8.54511,6.31758,1.41958,4.63381,2.81166,-7.01394,-6.0693,-2.7786,9.73183,-2.90131,7.55077,-7.13842,-5.28146,6.71514,1.28398,-6.98408,-7.04893,-3.03946,8.22141,-2.76417,10.5183,-7.35347,-6.89456,4.19345,2.16726,-2.02819,9.23817,4.97076,2.8067,-0.544473,9.04955,4.90727,2.29487,-6.31871,-7.17559,3.71665,0.621485,4.7903,2.33813,-6.47994,-7.53147,-6.80958,-5.71823,-8.07326,-5.96096,4.77342,1.8207,5.71856,1.93466,-2.70156,9.31583,-2.1478,10.5523,4.78855,1.63608,5.53507,2.60834,-7.00058,-6.46058,5.4738,2.43235,-1.34603,9.02452,-7.5337,-8.71074,-7.30893,-7.57253,-5.33752,-4.87402,-7.01364,-6.86542,-7.93331,-7.94791,-5.69392,-6.16116,-7.32291,-7.76491,-6.41965,-7.55783,-7.87996,-7.55785,-6.69005,-5.87906,3.92147,2.86809,-1.5552,9.66568,5.07989,1.47112,-7.48524,-5.0541,-1.82724,8.70402,-2.00421,9.88004,-2.62153,8.79332,-7.52111,-6.44819,4.06424,2.09518,-6.65494,-5.94752,6.93878,1.61033,-3.95728,7.60682,5.67016,2.21196,-7.81507,-5.79413,-2.41152,8.24128,-3.83738,9.21115,4.5516,4.55288,-5.75551,-5.93258,4.56545,2.59384,-7.45614,-9.47115,-2.39568,9.67642,5.57816,1.45712,-7.48184,-6.41134,-1.99415,12.867,-8.35854,-6.69675,-7.52559,-7.6793,5.7454,3.1602,2.94692,1.87483,-8.77324,-6.66682,-3.21125,8.68662,-6.25806,-7.24972,5.17639,1.0747,-2.44897,11.4775,-3.30172,8.89955,-2.85191,8.21201,-8.85893,-6.1322,4.08957,1.30155,-5.88132,-7.31173,-7.10309,-7.22943,-2.46068,8.18334,-7.01226,-7.85464,4.75411,2.12347,-3.42862,10.5642,7.16681,1.4423,5.42568,2.39863,-6.00833,-8.22609,-1.7619,9.62466,-2.49527,8.99016,-2.98837,8.82863,-2.97262,8.54856,-1.34142,9.26871,-5.99652,-6.95795,-1.87061,7.35277,-8.68277,-8.46425,-7.01808,-8.10441,-7.04269,-7.62501,-7.69783,-6.88348,-2.19829,10.4896,4.67396,1.2032,-5.58263,-6.90298,-5.69224,-4.29055,4.77285,1.27305,-3.33469,8.6929,-2.54195,8.47086,4.46492,1.21742,5.41158,-0.875373,-8.68069,-7.42278,-3.88687,8.07646,4.6682,2.00293,-8.29799,-8.64092,-1.86382,10.3829,-6.51234,-5.04193,4.54458,2.25219,-1.93264,9.32554,-3.06285,7.81641,-6.90714,-5.10786,4.69653,2.50286,6.43757,2.61401,-1.85483,8.9587,4.60224,3.07647,4.4492,2.1906,5.02181,2.40321,-2.22923,7.8888,5.68943,1.43793,-6.71097,-6.43817,-5.00633,-5.80006,-2.43763,8.53663,5.72577,2.44787,-6.57079,-5.17789,-5.77867,-4.92176,-6.57222,-6.06437,3.96639,2.25216,-7.95177,-9.80146,4.92574,2.30763,-7.6221,-8.20013,-6.4132,-6.91575,4.01432,2.36897,3.0833,1.54505,-1.99416,9.52807,-7.85128,-8.25973,-0.86423,8.76525,-6.31412,-8.64087,-8.07355,-6.73717,-2.52821,8.01176,-5.82357,-6.65687,-7.08865,-7.73063,-5.56251,-6.99818,-2.12513,8.98159,-6.89834,-7.26863,-7.92654,-6.34346,4.86201,1.49442,4.92905,4.42847,-5.57789,-5.3186,4.34232,3.34888,2.64614,2.34723,-4.10363,8.41491,-2.18648,8.18706,-3.39871,8.19848,-2.66098,9.6026,-6.95927,-6.42774,-5.61392,-7.74628,5.60376,4.18369,5.28536,4.13642,4.8428,0.457426,-6.33816,-6.12095,-2.4394,8.62897,4.56938,2.45967,4.0582,0.958413,5.62164,1.64834,5.73119,2.58231,4.66806,1.96405,-6.71905,-6.87706,-2.18503,8.88414,-6.03901,-6.33338,-8.38435,-6.12005,0.0641622,9.0735,5.19967,3.05395,-5.48716,-7.13016,-6.85541,-5.46789,-1.88353,8.15713,4.27891,3.1325,-2.75816,9.98586,-2.03022,9.34795,-7.66741,-7.50096,-3.39305,9.16801,-8.49476,-5.71537,-1.68378,9.8278,-7.41559,-6.07205,-3.15577,7.93274,5.22381,1.61388,3.65739,1.74854,4.94251,1.21889,-7.12832,-5.27276,-9.58286,-6.20223,-2.21613,8.29993,5.34799,2.92987,4.09496,2.37231,-7.25183,-5.79136,-6.46981,-7.12137,-6.28607,-9.8205,4.52865,1.06926,-3.10984,8.72259,3.61865,2.68153,-5.96604,-7.68329,3.11435,1.28126,-1.1064,7.61243,-2.17688,8.2658,-3.27246,7.2094,-5.55143,-6.32388,-1.69667,10.3705,-2.16558,7.25125,-6.36572,-6.70053,4.12259,3.38252,-4.80554,-7.79949,-5.23966,-6.13798,4.21969,1.69139,-1.98985,10.547,-2.52269,7.95658,-6.75642,-6.32862,-3.51521,7.8001,4.70435,-0.00229688,6.25359,2.4267,5.82935,0.745562,5.24778,2.15978,5.48052,1.32055,-3.05358,9.12521,-3.18922,9.24654,4.47276,2.11988,5.36751,2.02512,-2.18511,8.6292,-2.48469,9.51228,5.57556,3.24472,-2.58121,10.0178,-6.12629,-6.49895,-4.54732,8.0062,-4.20166,10.5438,-7.61422,-7.69036,-4.42797,8.98777,4.45301,1.53344,4.59296,2.45021,-6.81264,-6.36417,4.62346,3.16156,-5.93007,-8.36501,-2.78425,6.71237,-6.17141,-6.64689,-5.20608,8.95999,-7.30598,-5.73166,4.39572,2.93726,-1.89503,9.77179,-5.683,-7.48989,4.80924,0.559455,-2.17793,9.98983,5.23728,2.67434,-7.03976,-6.20877,3.90435,3.20926,-7.78536,-7.53388,-1.00684,9.08838,-5.26741,-5.98327,3.28002,2.71942,-1.47166,8.50427,-2.32733,9.26251,5.16271,1.39947,-6.59093,-6.61979,-2.44492,7.93654,-1.05805,9.97356,-3.1109,10.8666,3.38834,3.41693,4.83098,2.01961,-2.74013,9.71049,-3.34892,8.41489,4.94768,0.263001,3.57477,1.66795,5.78915,1.26999,-4.81812,-5.67174,-1.88508,9.64263,3.69048,4.60555,4.03037,1.7862,-7.4418,-7.08933}, + {0.127717,0.211407,0.195547,0.21633,0.39671,0.229008,0.20839,0.169236,0.314314,0.322473,0.169506,0.45499,0.147819,0.296502,0.15198,0.356444,0.0992833,0.220833,0.296206,0.178067,0.135359,0.189725,0.243099,0.519986,0.168105,0.273465,0.126033,0.18045,0.282832,0.193901,0.213704,0.425046,0.203191,0.228674,0.209267,0.355039,0.212918,0.315495,0.294112,0.257576,0.5786,0.186019,0.171919,0.171919,0.449151,1.34947,0.171919,0.16341,0.641387,0.342115,0.267343,0.246125,0.277612,0.181462,0.22944,1.95598,0.164897,0.235803,0.228273,0.314629,0.127403,0.241241,0.189362,0.151691,0.130085,0.526707,0.217069,0.282306,0.531523,0.177035,0.169776,0.20395,0.177165,0.146628,0.280013,0.223033,0.50947,0.184133,0.295329,0.183219,0.28166,0.179348,0.276462,1.00283,0.248147,0.214453,0.231732,0.170672,0.256893,0.133271,0.151137,0.500823,0.23678,0.376983,0.362061,0.140013,0.388863,0.398552,0.38015,0.190081,0.167115,0.206884,0.473849,1.05117,0.435665,0.323618,0.326201,0.32226,0.201787,0.246496,0.28325,0.226596,0.238153,0.277268,0.674629,0.179433,0.175651,0.154778,0.178195,0.192796,0.103571,0.227621,0.201124,0.160525,0.160964,0.240099,0.258027,0.134127,0.127717,0.341378,0.311595,0.282306,0.168988,0.40775,0.246125,0.583131,0.236804,0.238633,0.194824,0.169315,0.244227,0.249511,0.189725,0.305662,0.301415,0.658641,0.250944,0.151792,0.141383,0.143843,0.563347,0.184216,0.204155,0.221764,0.314908,0.144518,0.228808,0.255785,0.163457,0.424705,0.170202,0.312598,0.300629,0.532614,0.661392,0.228273,0.543432,0.257175,0.258994,0.281413,0.273897,0.246837,0.293489,0.25533,0.260492,0.213704,0.3091,0.17103,0.172285,0.241399,0.35999,0.372243,0.269191,0.390239,0.31761,0.200593,0.22197,0.752914,0.266571,0.13102,0.268659,0.293723,0.356294,0.296258,0.264531,0.15468,0.358535,0.243711,0.112147,0.121659,0.197101,0.515292,0.245628,0.279863,0.789807,0.195156,0.196073,0.149564,0.118675,0.389373,0.233821,0.176128,0.481088,0.360027,0.553152,0.208207,0.171608,0.160489,0.334298,0.139426,0.168603,0.266199,0.326458,0.103571,0.171208,0.130961,0.190887,0.177229,0.241651,0.115152,0.196753,0.481088,0.230965,0.354631,0.14591,0.328543,0.141544,0.195888,0.290379,0.245954,0.184547,0.575214,0.186929,0.28527,0.292213,1.20033,0.281528,0.15625,0.211524,0.186398,0.298061,0.147393,0.245349,0.164527,0.224771,0.222382,0.251643,0.148835,0.135359,0.204967,0.193024,0.486309,0.389686,0.211921,0.307405,0.38666,0.26802,0.16605,0.323134,0.268397,0.217894,0.974118,0.371618,0.156201,0.305787,0.339305,0.371032,0.381765,0.22747,0.24906,0.100884,0.253192,0.314253,0.388289,0.580947,1.00267,0.241998,0.489101,0.341501,0.247423,0.328311,0.440281,0.14927,0.244469,0.846828,0.191725,0.217429,0.123403,0.322875,0.145373,0.757259,0.190086,0.316286,0.268397,0.296721,0.440472,0.186848,0.232134,0.180239,0.219724,0.205886,0.250975,0.145636,0.312476,0.366418,0.128135,0.315235,0.264531,0.161815,0.31631,0.296489,0.37171,0.197217,0.195625,0.479579,0.443037,0.323347,0.193616,0.160251,0.8952,0.256291,0.593345,0.177165,0.409514,0.847863,0.111448,0.210031,0.251347,0.351953,0.705204,0.117901,0.182343,0.230179,0.83632,0.22104,0.145163,0.200326,0.23431,0.21868,0.253575,0.186562,0.192757,0.172716,0.27396,0.258581,0.327892,0.376138,0.223477,0.302375,0.145845,0.436902,0.421794,0.328543,0.19246,0.238889,0.254866,0.284674,0.457849,0.202937,0.392568,0.453083,0.782713,0.465401,0.178623,0.304863,0.190081,0.228641,0.255135,0.245037,0.217526,0.109584,0.276462,0.182301,0.38582,0.349942,1.3889,0.30235,0.796353,0.160168,0.643204,0.153752,0.410268,0.186439,0.256834,0.185783,0.0957629,0.226596,0.197951,0.17123,0.192836,0.18405,0.575784,0.228874,0.201787,0.241209,0.217386,0.195751,0.291585,0.144531,0.14176,0.157635,0.410268,0.476338,0.308148,0.148077,0.152093,0.196791,0.568087,0.414026,0.250587,0.473463,0.293645,0.396768,0.2766,0.38664,0.135034,1.50827,0.472527,0.268418,0.40383,0.375914,0.246496,0.176474,0.340405,0.220833,0.138782,0.159009,0.444219,0.259582,0.33638,0.195586,0.210974,0.200288,0.148129,0.0974216,0.211588,0.280081,0.44113,0.773921,0.553848,0.448079,0.183136,0.380854,0.685021,0.308767,0.553276,0.181578,0.164759,0.313889,0.137886,0.545387,0.278449,0.736895,0.360054,0.358929,0.457315,0.343278,0.507662,0.280829,0.113886,0.23146,0.160584,0.192796,0.147561,0.241272,0.168988,0.730511,0.27836,0.179847,0.22555,0.418069,0.158348,0.128965,0.179454,0.126366,0.164434,0.273633,0.309556,0.500823,0.367852,0.192875,0.230262,0.32724,0.249969,0.142618,0.494229,0.36108,0.227931,0.23113,0.742825,0.190126,0.33741,0.280598,0.145268,0.378423,0.211921,0.183594,0.59201,0.279563,0.195683,0.248101,0.199754,0.342494,0.174343,0.14149,0.28085,0.175781,0.518738,0.17223,0.489904,0.181167,0.354286,0.297824,0.280829,0.219412,0.22814,0.195625,0.313949,0.294708,0.211551,0.236255,0.666933,0.204808,0.52591,0.180725,0.186889,0.246589,0.410575,0.338348,0.206219,0.361766,0.158143,0.280816,0.4149,0.773082,0.340046,0.369672,0.256923,0.167195,0.197217,0.252339,0.172716,0.191526,0.263085,0.345698,0.168286,0.243099,0.434631,0.22944,0.161862,0.206589,0.23457,0.181924,0.419063,0.183427,0.186152,0.236352,0.306336,0.149002,1.50086,0.188231,0.442757,0.485602,0.466662,0.17329,0.141329,0.180619,0.160061,0.192569,0.270999,0.117901,0.362693,0.217561,0.208975,0.233658,0.175173,1.10307,0.14625,1.31124,0.237608,0.286784,0.325112,0.2485,0.259641,0.553152,0.179039,0.780781,0.174758,0.297824,0.2558,0.235949,0.952186,0.356744,0.312646,0.189362,0.574524,0.705204,0.213168,0.225956,0.424165,0.169506,0.137109,0.352451,0.454554,0.653302,0.31261,0.194412,0.23719,0.137886,0.31498,0.199085,0.203875,0.597248,1.10036,0.196869,0.22104,0.451345,0.105613,0.683928,0.135204,0.25533,0.607871,0.219724,0.184464,0.725001,0.160061,0.333407,0.192569,0.234147,0.47178,0.161815,0.242455,0.215305,0.410575,0.242376,0.211335,0.462804,0.275065,0.126878,0.170404,0.179433,0.147244,0.109584,0.352905,0.158215,0.197604,0.172407,0.407506,0.645446,0.313061,0.165602,0.136663,0.55444,0.15527,0.133128,0.125912,0.340405,0.44521,0.122783,0.814526,0.243773,0.15743,0.266743,0.684458,0.22221,0.181294,0.193901,0.258802,0.167195,0.292056,0.132309,0.227671,0.117334,0.271758,0.146185,0.225042,0.225964,0.194863,0.290274,0.138438,0.196714,0.266012,0.267771,0.162544,0.244258,0.358038,0.522617,0.192875,0.45066,0.330396,0.223477,0.42967,0.350884,0.404655,0.123155,0.431583,0.191675,0.147354,0.609034,0.459487,0.187337,0.215128,0.604169,0.330165,0.494229,0.40775,0.167377,0.192648,0.234635,0.275578,0.253094,0.420063,0.228299,0.206478,0.20395,0.377656,0.317393,0.478623,0.159009,0.217034,0.300933,0.139754,0.153901,0.261077,0.22834,0.449609,0.157672,0.176474,0.285704,0.180186,0.212738,0.266428,0.388313,0.0954637,0.298093,0.251643,0.330696,0.159572,0.210666,0.149411,0.139618,0.338472,0.450304,0.208793,0.583609,0.185865,0.400576,0.21626,0.174867,0.239144,0.249113,0.200402,0.275065,0.238793,0.205784,0.4475,0.231262,0.259082,0.20934,0.16806,0.193616,0.213811,0.395632,0.482465,0.274649,0.307405,0.165866,0.334275,0.683337,0.368825,0.14625,0.780742,0.163457,0.226596,0.138713,1.79155,0.400443,0.233658,0.426399,0.623024,0.670955,0.123588,0.110899,0.173751,0.651068,0.199983,0.190887,0.541435,0.21324,0.266571,0.134638,0.179348,0.145636,0.170929,0.623252,0.587738,0.109688,0.515314,0.217666,0.213311,0.249144,0.187947,0.270999,0.268311,0.469782,0.763609,0.32124,0.146315,0.265223,0.298694,0.197623,0.21349,0.845778,0.175466,0.123588,0.17223,0.258603,1.17119,0.538142,0.407675,0.120288,0.587238,0.244664,0.333956,0.132812,0.21399,0.302375,0.275882,0.134284,0.377555,0.228541,0.187307,0.143804,0.180545,0.222451,0.239638,0.188028,0.46334,0.175868,0.242392,0.314762,0.44473,0.21962,0.175966,1.12364,0.138837,0.400576,0.18184,0.137706,0.409763,0.216894,0.466662,0.376604,0.487155,0.283143,0.118547,0.221591,0.122783,0.179007,0.16628,0.180999,0.239845,0.169607,0.578402,0.396537,0.222288,0.563237,0.371238,0.138658,0.324336,0.191526,0.168603,0.357715,0.640905,0.460706,0.220902,0.240797,0.164062,0.157853,0.34457,0.196092,0.289353,0.104597,0.259641,0.126878,0.175781,0.441458,0.820108,0.261864,0.23431,0.254506,0.271955,0.227529,0.22834,0.196753,0.224906,0.193783,0.419481,0.236933,0.229706,0.29785,0.222947,0.177606,0.216911,0.305188,0.933438,0.116666,0.278483,0.0973824,0.271224,0.127717,1.28139,0.276283,0.180704,0.234554,0.285984,0.290172,0.49594,0.135879,0.436784,0.206219,0.342215,0.374165,0.182217,0.274864,0.625,0.356925,0.194324,0.342215,0.113012,0.155123,0.254207,0.438919,0.262548,0.302299,0.179528,0.312744,0.168513,0.142618,0.150543,0.231361,0.166004,0.186725,0.38848,0.179857,0.182301,0.629476,0.44113,0.289669,0.328543,0.279938,0.14625,0.187174,0.157635,0.396749,0.798931,0.201541,0.778619,0.265883,0.258027,0.218576,0.266571,0.160168,0.230303,0.273633,0.233298,0.30175,0.217069,0.345145,0.397901,0.224499,0.248101,0.241335,0.222947,0.237094,0.176518,0.380032,0.634775,0.426193,0.16362,0.231097,0.219898,0.343789,0.275578,0.282022,0.628542,0.232184,0.848367,0.200754,0.179177}, + {0,0,2,3,3,0,2,2,2,2,3,0,3,2,2,2,3,3,3,3,2,0,0,0,2,3,3,3,2,2,0,0,2,3,3,0,0,2,0,0,3,2,3,0,3,0,3,3,0,2,0,3,2,0,3,0,3,3,3,2,2,3,0,0,3,3,0,2,2,3,0,3,2,2,2,0,2,3,3,3,2,3,3,3,2,0,2,0,3,3,3,3,2,2,0,2,0,3,2,2,2,0,0,3,0,2,2,3,2,3,0,2,2,2,3,2,0,0,2,3,3,2,0,2,0,0,2,0,2,2,3,2,2,0,3,0,3,2,2,2,3,3,0,0,0,3,2,3,3,3,3,0,2,0,3,2,3,2,3,0,2,3,3,2,3,3,2,2,0,0,2,3,3,2,3,0,2,0,2,0,3,2,3,2,3,0,3,0,3,0,2,3,2,2,3,0,2,2,2,0,3,2,3,3,2,3,2,3,3,2,2,0,0,2,2,3,0,3,0,2,0,0,2,3,0,3,3,2,0,3,3,0,3,0,2,2,0,2,0,2,0,0,0,2,0,3,2,3,2,3,2,2,0,2,3,2,3,2,2,2,2,3,0,2,0,0,2,3,3,0,2,3,2,2,3,0,3,0,0,2,0,2,0,2,2,3,3,2,3,0,0,3,2,2,0,3,2,0,0,3,0,0,2,0,3,2,0,2,0,0,0,0,0,2,0,0,2,3,0,0,2,0,0,2,0,2,3,2,3,3,2,2,0,0,0,3,0,2,0,2,0,2,2,2,3,3,0,0,3,3,3,3,3,2,3,3,2,3,3,0,2,2,2,2,0,2,0,0,0,2,2,3,3,2,3,2,3,0,2,3,0,2,0,2,2,0,3,0,2,0,2,3,0,3,0,0,0,3,2,3,3,0,3,2,3,0,2,3,3,0,2,3,0,0,0,2,0,3,0,2,3,3,3,3,3,0,2,0,2,2,3,3,0,3,0,2,0,2,0,3,0,0,0,2,3,3,2,3,0,0,0,0,3,3,0,3,2,0,2,3,2,2,3,3,2,2,2,0,2,3,0,3,3,0,0,2,0,3,2,3,0,2,0,2,2,3,2,0,3,3,3,2,3,0,3,0,2,2,0,0,0,3,0,3,3,2,3,2,3,2,3,0,2,3,0,2,0,3,3,3,3,3,3,2,0,3,2,2,2,3,3,2,3,0,2,3,3,2,2,0,0,0,0,3,0,3,3,3,0,0,0,3,3,3,3,3,0,2,3,3,3,3,3,3,0,0,2,2,3,3,2,2,0,0,3,0,0,0,2,3,0,0,0,3,0,3,0,2,2,0,0,0,0,3,2,2,3,2,3,2,2,2,2,3,0,0,2,3,0,3,3,0,3,0,0,2,0,3,3,0,2,2,3,3,0,0,2,0,2,3,2,0,0,3,3,0,3,2,0,2,0,2,3,2,0,3,3,2,0,0,2,2,0,0,2,0,3,3,2,3,2,0,3,0,2,2,3,3,0,3,2,2,0,3,0,0,0,2,0,3,2,0,2,3,2,3,2,2,3,3,0,2,3,2,3,2,2,0,3,0,3,0,2,2,2,0,2,0,2,2,0,0,3,3,0,0,3,2,0,2,3,2,2,0,3,3,0,2,0,3,3,0,2,3,2,3,2,0,2,2,0,0,0,2,2,3,3,2,2,0,2,3,0,0,0,0,0,0,0,0,0,0,2,3,2,0,3,3,3,0,2,0,2,3,2,0,3,3,2,0,2,0,3,2,0,3,0,0,2,2,0,3,0,2,3,3,3,0,2,0,0,3,0,2,3,2,2,0,3,3,3,3,3,0,3,0,0,0,0,3,2,0,0,2,3,3,2,2,0,3,2,0,3,0,2,3,3,0,2,2,3,2,2,2,3,2,0,0,3,2,0,0,0,2,0,2,0,0,2,2,3,0,3,0,0,3,0,0,0,3,0,0,2,2,0,2,2,3,3,3,3,0,0,2,2,2,0,3,2,2,2,2,2,0,3,0,0,3,2,0,0,3,2,3,3,0,3,0,3,0,3,2,2,2,0,0,3,2,2,0,0,0,2,3,2,0,2,3,3,3,0,3,3,0,2,0,0,2,3,3,0,3,2,2,2,2,2,3,3,2,2,3,3,2,3,0,3,3,0,3,2,2,0,2,0,3,0,3,0,2,3,0,2,3,2,0,2,0,3,0,2,3,3,2,0,3,3,3,2,2,3,3,2,2,2,0,3,2,2,0}, + {271,271,329,343,387,426,426,601}, + {426,601,426,387,343,271,329,271}, + {3.70991,4.43491,3.76334,9.43944,9.43944,3.70991,3.76334,4.43491}} +}; + +typedef ConnectComponentsEdgesTest ConnectComponentsEdgesTestF_Int; +TEST_P(ConnectComponentsEdgesTestF_Int, Result) +{ + EXPECT_TRUE(true); +} + +INSTANTIATE_TEST_CASE_P(ConnectComponentsEdgesTest, + ConnectComponentsEdgesTestF_Int, + ::testing::ValuesIn(mr_fix_conn_inputsf2)); + }; // namespace sparse }; // end namespace raft From 1e226364abd350bee7a991c77c8ee13e9de2a43f Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Mon, 5 Jun 2023 08:39:49 -0700 Subject: [PATCH 28/53] style changes --- cpp/include/raft/matrix/detail/gather.cuh | 30 +- cpp/include/raft/matrix/detail/scatter.cuh | 58 +- cpp/include/raft/matrix/gather.cuh | 11 +- cpp/include/raft/matrix/scatter.cuh | 14 +- .../neighbors/detail/connect_components.cuh | 34 +- cpp/test/matrix/gather.cu | 53 +- cpp/test/matrix/scatter.cu | 54 +- .../sparse/neighbors/connect_components.cu | 523 ++++++++++++++++-- 8 files changed, 605 insertions(+), 172 deletions(-) diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index 84a6b493d2..71def5db58 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -16,9 +16,9 @@ #pragma once -#include #include #include +#include #include #include #include @@ -354,16 +354,16 @@ template inout, raft::device_vector_view map, - MapTransformOp transform_op, - IndexT batch_size) + MapTransformOp transform_op, + IndexT batch_size) { // return type of MapTransformOp, must be convertible to IndexT typedef typename std::result_of::type MapTransformOpReturnT; RAFT_EXPECTS((std::is_convertible::value), - "MapTransformOp's result type must be convertible to signed integer"); + "MapTransformOp's result type must be convertible to signed integer"); - IndexT m = inout.extent(0); - IndexT n = inout.extent(1); + IndexT m = inout.extent(0); + IndexT n = inout.extent(1); IndexT map_length = map.extent(0); // skip in case of 0 length input @@ -385,15 +385,15 @@ void gatherInplaceImpl(raft::resources const& handle, raft::make_device_vector(handle, map_length * cols_per_batch); auto gather_op = [inout = inout.data_handle(), - map = map.data_handle(), - transform_op, - batch_offset, - map_length, - cols_per_batch = raft::util::FastIntDiv(cols_per_batch), - n] __device__(auto idx) { - IndexT row = idx / cols_per_batch; - IndexT col = idx % cols_per_batch; - MapT map_val = map[row]; + map = map.data_handle(), + transform_op, + batch_offset, + map_length, + cols_per_batch = raft::util::FastIntDiv(cols_per_batch), + n] __device__(auto idx) { + IndexT row = idx / cols_per_batch; + IndexT col = idx % cols_per_batch; + MapT map_val = map[row]; IndexT i_src = transform_op(map_val); return inout[i_src * n + batch_offset + col]; diff --git a/cpp/include/raft/matrix/detail/scatter.cuh b/cpp/include/raft/matrix/detail/scatter.cuh index b0f5bc98cf..2b29587c58 100644 --- a/cpp/include/raft/matrix/detail/scatter.cuh +++ b/cpp/include/raft/matrix/detail/scatter.cuh @@ -51,25 +51,22 @@ namespace detail { * @param[inout] batch_size column batch size */ - - - - template -void scatterInplaceImpl(raft::resources const& handle, - raft::device_matrix_view inout, - raft::device_vector_view map, - IndexT batch_size) +void scatterInplaceImpl( + raft::resources const& handle, + raft::device_matrix_view inout, + raft::device_vector_view map, + IndexT batch_size) { - - IndexT m = inout.extent(0); - IndexT n = inout.extent(1); + IndexT m = inout.extent(0); + IndexT n = inout.extent(1); IndexT map_length = map.extent(0); // skip in case of 0 length input if (map_length <= 0 || m <= 0 || n <= 0 || batch_size < 0) return; - RAFT_EXPECTS(map_length == m, "Length of map should be equal to number of rows for inplace scatter"); + RAFT_EXPECTS(map_length == m, + "Length of map should be equal to number of rows for inplace scatter"); // re-assign batch_size for default case if (batch_size == 0) batch_size = n; @@ -80,32 +77,34 @@ void scatterInplaceImpl(raft::resources const& handle, IndexT n_batches = raft::ceildiv(n, batch_size); - auto scratch_space = - raft::make_device_vector(handle, m * batch_size); + auto scratch_space = raft::make_device_vector(handle, m * batch_size); for (IndexT bid = 0; bid < n_batches; bid++) { IndexT batch_offset = bid * batch_size; IndexT cols_per_batch = min(batch_size, n - batch_offset); auto copy_op = [inout = inout.data_handle(), - map = map.data_handle(), - batch_offset, - cols_per_batch = raft::util::FastIntDiv(cols_per_batch), - n] __device__(auto idx) { + map = map.data_handle(), + batch_offset, + cols_per_batch = raft::util::FastIntDiv(cols_per_batch), + n] __device__(auto idx) { IndexT row = idx / cols_per_batch; IndexT col = idx % cols_per_batch; return inout[row * n + batch_offset + col]; }; - raft::linalg::map_offset(handle, raft::make_device_vector_view(scratch_space.data_handle(), m * cols_per_batch), copy_op); + raft::linalg::map_offset( + handle, + raft::make_device_vector_view(scratch_space.data_handle(), m * cols_per_batch), + copy_op); auto scatter_op = [inout = inout.data_handle(), - map = map.data_handle(), - scratch_space = scratch_space.data_handle(), - batch_offset, - cols_per_batch = raft::util::FastIntDiv(cols_per_batch), - n] __device__(auto idx) { - IndexT row = idx / cols_per_batch; - IndexT col = idx % cols_per_batch; + map = map.data_handle(), + scratch_space = scratch_space.data_handle(), + batch_offset, + cols_per_batch = raft::util::FastIntDiv(cols_per_batch), + n] __device__(auto idx) { + IndexT row = idx / cols_per_batch; + IndexT col = idx % cols_per_batch; IndexT map_val = map[row]; inout[map_val * n + batch_offset + col] = scratch_space[idx]; @@ -118,14 +117,13 @@ void scatterInplaceImpl(raft::resources const& handle, template void scatter(raft::resources const& handle, - raft::device_matrix_view inout, - raft::device_vector_view map, - IndexT batch_size) + raft::device_matrix_view inout, + raft::device_vector_view map, + IndexT batch_size) { scatterInplaceImpl(handle, inout, map, batch_size); } - } // end namespace detail } // end namespace matrix } // end namespace raft \ No newline at end of file diff --git a/cpp/include/raft/matrix/gather.cuh b/cpp/include/raft/matrix/gather.cuh index f00244b752..b07694445f 100644 --- a/cpp/include/raft/matrix/gather.cuh +++ b/cpp/include/raft/matrix/gather.cuh @@ -311,14 +311,19 @@ void gather_if(const raft::resources& handle, * @param[in] handle raft handle * @param[inout] inout input matrix (n_rows * n_cols) * @param[in] map Pointer to the input sequence of gather locations -* @param[in] col_batch_size (optional) column batch size. Determines the shape of the scratch space (map_length, col_batch_size). When set to zero (default), no batching is done and an additional scratch space of shape (map_lengthm, n_cols) is created. + * @param[in] col_batch_size (optional) column batch size. Determines the shape of the scratch space + * (map_length, col_batch_size). When set to zero (default), no batching is done and an additional + * scratch space of shape (map_lengthm, n_cols) is created. * @param[in] transform_op (optional) Transformation to apply to map values */ -template +template void gather(raft::resources const& handle, raft::device_matrix_view inout, raft::device_vector_view map, - idx_t col_batch_size = 0, + idx_t col_batch_size = 0, map_xform_t transform_op = raft::identity_op()) { detail::gather(handle, inout, map, transform_op, col_batch_size); diff --git a/cpp/include/raft/matrix/scatter.cuh b/cpp/include/raft/matrix/scatter.cuh index 5e6850d214..849a5c7409 100644 --- a/cpp/include/raft/matrix/scatter.cuh +++ b/cpp/include/raft/matrix/scatter.cuh @@ -39,14 +39,18 @@ namespace raft::matrix { * * @param[in] handle raft handle * @param[inout] inout input matrix (n_rows * n_cols) - * @param[in] map Pointer to the input sequence of scatter locations. The length of the map should be equal to the number of rows in the input matrix. Map indices should be unique and in the range [0, n_rows). The map represents a complete permutation of indices. - * @param[in] col_batch_size (optional) column batch size. Determines the shape of the scratch space (n_rows, col_batch_size). When set to zero (default), no batching is done and an additional scratch space of shape (n_rows, n_cols) is created. + * @param[in] map Pointer to the input sequence of scatter locations. The length of the map should + * be equal to the number of rows in the input matrix. Map indices should be unique and in the range + * [0, n_rows). The map represents a complete permutation of indices. + * @param[in] col_batch_size (optional) column batch size. Determines the shape of the scratch space + * (n_rows, col_batch_size). When set to zero (default), no batching is done and an additional + * scratch space of shape (n_rows, n_cols) is created. */ template void scatter(raft::resources const& handle, - raft::device_matrix_view inout, - raft::device_vector_view map, - idx_t col_batch_size = 0) + raft::device_matrix_view inout, + raft::device_vector_view map, + idx_t col_batch_size = 0) { detail::scatter(handle, inout, map, col_batch_size); } diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index afcc981d3e..8d9e1eef58 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -15,7 +15,6 @@ */ #pragma once -#include "raft/core/logger-macros.hpp" #include #include #include @@ -23,8 +22,8 @@ #include #include -#include #include +#include #include #include #include @@ -112,7 +111,8 @@ struct FixConnectivitiesRedOp { void gather(const raft::resources& handle, value_idx* map) { auto tmp_colors = raft::make_device_vector(handle, m); - thrust::gather(raft::resource::get_thrust_policy(handle), map, map + m, colors, tmp_colors.data_handle()); + thrust::gather( + raft::resource::get_thrust_policy(handle), map, map + m, colors, tmp_colors.data_handle()); raft::copy_async(colors, tmp_colors.data_handle(), m, raft::resource::get_cuda_stream(handle)); } @@ -123,7 +123,6 @@ struct FixConnectivitiesRedOp { raft::resource::get_thrust_policy(handle), colors, colors + m, map, tmp_colors.data_handle()); raft::copy_async(colors, tmp_colors.data_handle(), m, raft::resource::get_cuda_stream(handle)); } - }; /** @@ -262,14 +261,14 @@ void perform_1nn(raft::resources const& handle, auto colors_group_idxs = raft::make_device_vector(handle, n_components + 1); raft::sparse::convert::sorted_coo_to_csr( colors, n_rows, colors_group_idxs.data_handle(), n_components + 1, stream); - - auto group_idxs_view = - raft::make_device_vector_view(colors_group_idxs.data_handle() + 1, n_components); + + auto group_idxs_view = raft::make_device_vector_view( + colors_group_idxs.data_handle() + 1, n_components); auto x_norm = raft::make_device_vector(handle, (value_idx)n_rows); raft::linalg::rowNorm( x_norm.data_handle(), X, n_cols, n_rows, raft::linalg::L2Norm, true, stream); - + auto adj = raft::make_device_matrix(handle, row_batch_size, n_components); using OutT = raft::KeyValuePair; using ParamT = raft::distance::masked_l2_nn_params; @@ -282,12 +281,9 @@ void perform_1nn(raft::resources const& handle, size_t n_batches = raft::ceildiv(n_rows, row_batch_size); - RAFT_LOG_INFO("row_batch_size %zu, col_batch_size %zu, n_batches %zu", row_batch_size, col_batch_size, n_batches); - for (size_t bid = 0; bid < n_batches; bid++) { size_t batch_offset = bid * row_batch_size; size_t rows_per_batch = min(row_batch_size, n_rows - batch_offset); - RAFT_LOG_INFO("rows_per_batch %zu, batch_offset %zu", rows_per_batch, batch_offset); auto X_batch_view = raft::make_device_matrix_view( X + batch_offset * n_cols, rows_per_batch, n_cols); @@ -302,7 +298,7 @@ void perform_1nn(raft::resources const& handle, value_idx col = idx % n_components; return colors[batch_offset + row] != col; }; - + auto adj_vector_view = raft::make_device_vector_view( adj.data_handle(), rows_per_batch * n_components); @@ -324,7 +320,6 @@ void perform_1nn(raft::resources const& handle, adj_view, group_idxs_view, kvp_view); - } thrust::transform(exec_policy, @@ -341,17 +336,19 @@ void perform_1nn(raft::resources const& handle, raft::matrix::scatter(handle, X_mutable_view, sort_plan_const_view, (value_idx)col_batch_size); auto tmp_colors = raft::make_device_vector(handle, n_rows); - auto tmp_kvp = raft::make_device_vector (handle, n_rows); + auto tmp_kvp = raft::make_device_vector(handle, n_rows); thrust::scatter(exec_policy, kvp, kvp + n_rows, sort_plan.data_handle(), tmp_kvp.data_handle()); - thrust::scatter(exec_policy, colors, colors + n_rows, sort_plan.data_handle(), tmp_colors.data_handle()); + thrust::scatter( + exec_policy, colors, colors + n_rows, sort_plan.data_handle(), tmp_colors.data_handle()); reduction_op.scatter(handle, sort_plan.data_handle()); raft::copy_async(colors, tmp_colors.data_handle(), n_rows, stream); raft::copy_async(kvp, tmp_kvp.data_handle(), n_rows, stream); auto keys = raft::make_device_vector(handle, n_rows); - raft::linalg::map_offset(handle, keys.view(), [kvp]__device__(auto idx) { return kvp[idx].key; }); + raft::linalg::map_offset( + handle, keys.view(), [kvp] __device__(auto idx) { return kvp[idx].key; }); LookupColorOp extract_colors_op(colors); thrust::transform(exec_policy, kvp, kvp + n_rows, nn_colors, extract_colors_op); @@ -492,7 +489,7 @@ void connect_components(raft::resources const& handle, bool zero_based = true; raft::label::make_monotonic( colors.data(), const_cast(orig_colors), n_rows, stream, zero_based); - + /** * First compute 1-nn for all colors where the color of each data point * is guaranteed to be != color of its nearest neighbor. @@ -545,7 +542,7 @@ void connect_components(raft::resources const& handle, min_components_by_color( min_edges, out_index.data(), src_indices.data(), temp_inds_dists.data(), n_rows, stream); - + /** * Symmetrize resulting edge list */ @@ -553,5 +550,4 @@ void connect_components(raft::resources const& handle, handle, min_edges.rows(), min_edges.cols(), min_edges.vals(), n_rows, n_rows, size, out); } - }; // end namespace raft::sparse::neighbors::detail diff --git a/cpp/test/matrix/gather.cu b/cpp/test/matrix/gather.cu index ccbfe0adab..dcc4e81c6d 100644 --- a/cpp/test/matrix/gather.cu +++ b/cpp/test/matrix/gather.cu @@ -78,7 +78,12 @@ struct GatherInputs { unsigned long long int seed; }; -template +template class GatherTest : public ::testing::TestWithParam> { protected: GatherTest() @@ -93,7 +98,7 @@ class GatherTest : public ::testing::TestWithParam> { } void SetUp() override - { + { raft::random::RngState r(params.seed); raft::random::RngState r_int(params.seed); @@ -172,7 +177,10 @@ class GatherTest : public ::testing::TestWithParam> { } if (Inplace) { - raft::copy_async(d_out_act.data(), d_in.data(), map_length * params.ncols, raft::resource::get_cuda_stream(handle)); + raft::copy_async(d_out_act.data(), + d_in.data(), + map_length * params.ncols, + raft::resource::get_cuda_stream(handle)); } resource::sync_stream(handle, stream); @@ -188,24 +196,23 @@ class GatherTest : public ::testing::TestWithParam> { rmm::device_uvector d_map; }; -#define GATHER_TEST(test_type, test_name, test_inputs) \ - typedef RAFT_DEPAREN(test_type) test_name; \ - TEST_P(test_name, Result) \ - { \ - ASSERT_TRUE(devArrMatch(d_out_exp.data(), \ - d_out_act.data(), \ - d_out_exp.size(), \ - raft::Compare())); \ - } \ +#define GATHER_TEST(test_type, test_name, test_inputs) \ + typedef RAFT_DEPAREN(test_type) test_name; \ + TEST_P(test_name, Result) \ + { \ + ASSERT_TRUE( \ + devArrMatch(d_out_exp.data(), d_out_act.data(), d_out_exp.size(), raft::Compare())); \ + } \ INSTANTIATE_TEST_CASE_P(GatherTests, test_name, ::testing::ValuesIn(test_inputs)) -const std::vector> inputs_i32 = - raft::util::itertools::product>({25, 2000}, {6, 31, 129}, {11, 999}, {2, 3, 6}, {1234ULL}); +const std::vector> inputs_i32 = raft::util::itertools::product>( + {25, 2000}, {6, 31, 129}, {11, 999}, {2, 3, 6}, {1234ULL}); const std::vector> inputs_i64 = raft::util::itertools::product>( {25, 2000}, {6, 31, 129}, {11, 999}, {2, 3, 6}, {1234ULL}); const std::vector> inplace_inputs_i32 = - raft::util::itertools::product>({2000}, {6, 31, 129}, {11, 999}, {2, 3, 6}, {1234ULL}); + raft::util::itertools::product>( + {2000}, {6, 31, 129}, {11, 999}, {2, 3, 6}, {1234ULL}); const std::vector> inplace_inputs_i64 = raft::util::itertools::product>( {2000}, {6, 31, 129}, {11, 999}, {2, 3, 6}, {1234ULL}); @@ -214,7 +221,9 @@ GATHER_TEST((GatherTest), GatherTestF GATHER_TEST((GatherTest), GatherTransformTestFU32I32, inputs_i32); -GATHER_TEST((GatherTest), GatherIfTestFU32I32, inputs_i32); +GATHER_TEST((GatherTest), + GatherIfTestFU32I32, + inputs_i32); GATHER_TEST((GatherTest), GatherIfTransformTestFU32I32, inputs_i32); @@ -227,7 +236,13 @@ GATHER_TEST((GatherTest), GATHER_TEST((GatherTest), GatherIfTransformTestFI64I64, inputs_i64); -GATHER_TEST((GatherTest), GatherInplaceTestFU32I32, inplace_inputs_i32); -GATHER_TEST((GatherTest), GatherInplaceTestFU32I64, inplace_inputs_i64); -GATHER_TEST((GatherTest), GatherInplaceTestFI64I64, inplace_inputs_i64); +GATHER_TEST((GatherTest), + GatherInplaceTestFU32I32, + inplace_inputs_i32); +GATHER_TEST((GatherTest), + GatherInplaceTestFU32I64, + inplace_inputs_i64); +GATHER_TEST((GatherTest), + GatherInplaceTestFI64I64, + inplace_inputs_i64); } // end namespace raft \ No newline at end of file diff --git a/cpp/test/matrix/scatter.cu b/cpp/test/matrix/scatter.cu index 0824633323..4bc09226ae 100644 --- a/cpp/test/matrix/scatter.cu +++ b/cpp/test/matrix/scatter.cu @@ -15,7 +15,6 @@ */ #include "../test_utils.cuh" -#include #include #include #include @@ -24,30 +23,24 @@ #include #include #include +#include #include #include -#include -#include #include +#include +#include namespace raft { -template -void naiveScatter(InputIteratorT in, - IdxT D, - IdxT N, - MapIteratorT map, - IdxT map_length, - OutputIteratorT out) +template +void naiveScatter( + InputIteratorT in, IdxT D, IdxT N, MapIteratorT map, IdxT map_length, OutputIteratorT out) { for (IdxT outRow = 0; outRow < map_length; ++outRow) { typename std::iterator_traits::value_type map_val = map[outRow]; - IdxT outRowStart = map_val * D; - IdxT inRowStart = outRow * D; + IdxT outRowStart = map_val * D; + IdxT inRowStart = outRow * D; for (IdxT i = 0; i < D; ++i) { out[outRowStart + i] = in[inRowStart + i]; } @@ -75,11 +68,11 @@ class ScatterTest : public ::testing::TestWithParam> { } void SetUp() override - { + { raft::random::RngState r(params.seed); raft::random::RngState r_int(params.seed); - IdxT len = params.nrows * params.ncols; + IdxT len = params.nrows * params.ncols; // input matrix setup d_in.resize(params.nrows * params.ncols, stream); @@ -94,7 +87,7 @@ class ScatterTest : public ::testing::TestWithParam> { auto exec_policy = raft::resource::get_thrust_policy(handle); thrust::counting_iterator permute_iter(0); - thrust::copy(exec_policy, permute_iter, permute_iter + params.nrows, d_map.data()); + thrust::copy(exec_policy, permute_iter, permute_iter + params.nrows, d_map.data()); thrust::default_random_engine g; thrust::shuffle(exec_policy, d_map.data(), d_map.data() + params.nrows, g); @@ -107,19 +100,14 @@ class ScatterTest : public ::testing::TestWithParam> { d_out_exp.resize(params.nrows * params.ncols, stream); // launch scatter on the host and copy the results to device - naiveScatter(h_in.data(), - params.ncols, - params.nrows, - h_map.data(), - params.nrows, - h_out.data()); + naiveScatter(h_in.data(), params.ncols, params.nrows, h_map.data(), params.nrows, h_out.data()); raft::update_device(d_out_exp.data(), h_out.data(), params.nrows * params.ncols, stream); auto inout_view = raft::make_device_matrix_view( d_in.data(), params.nrows, params.ncols); auto map_view = raft::make_device_vector_view(d_map.data(), params.nrows); - raft::matrix::scatter(handle, inout_view, map_view, params.col_batch_size); + raft::matrix::scatter(handle, inout_view, map_view, params.col_batch_size); resource::sync_stream(handle, stream); } @@ -133,15 +121,13 @@ class ScatterTest : public ::testing::TestWithParam> { rmm::device_uvector d_map; }; -#define SCATTER_TEST(test_type, test_name, test_inputs) \ - typedef RAFT_DEPAREN(test_type) test_name; \ - TEST_P(test_name, Result) \ - { \ - ASSERT_TRUE(devArrMatch(d_in.data(), \ - d_out_exp.data(), \ - d_out_exp.size(), \ - raft::Compare())); \ - } \ +#define SCATTER_TEST(test_type, test_name, test_inputs) \ + typedef RAFT_DEPAREN(test_type) test_name; \ + TEST_P(test_name, Result) \ + { \ + ASSERT_TRUE( \ + devArrMatch(d_in.data(), d_out_exp.data(), d_out_exp.size(), raft::Compare())); \ + } \ INSTANTIATE_TEST_CASE_P(ScatterTests, test_name, ::testing::ValuesIn(test_inputs)) const std::vector> inputs_i32 = diff --git a/cpp/test/sparse/neighbors/connect_components.cu b/cpp/test/sparse/neighbors/connect_components.cu index 1a9a99f5eb..b4a2c2b344 100644 --- a/cpp/test/sparse/neighbors/connect_components.cu +++ b/cpp/test/sparse/neighbors/connect_components.cu @@ -361,16 +361,15 @@ INSTANTIATE_TEST_CASE_P(ConnectComponentsTest, ConnectComponentsTestF_Int, ::testing::ValuesIn(fix_conn_inputsf2)); - template - struct MutualReachabilityFixConnectivitiesRedOp { +struct MutualReachabilityFixConnectivitiesRedOp { value_t* core_dists; value_idx m; DI MutualReachabilityFixConnectivitiesRedOp() : m(0) {} MutualReachabilityFixConnectivitiesRedOp(value_t* core_dists_, value_idx m_) - : core_dists(core_dists_), m(m_) {}; + : core_dists(core_dists_), m(m_){}; typedef typename raft::KeyValuePair KVP; DI void operator()(value_idx rit, KVP* out, const KVP& other) const @@ -427,16 +426,25 @@ template void gather(const raft::resources& handle, value_idx* map) { auto tmp_core_dists = raft::make_device_vector(handle, m); - thrust::gather(raft::resource::get_thrust_policy(handle), map, map + m, core_dists, tmp_core_dists.data_handle()); - raft::copy_async(core_dists, tmp_core_dists.data_handle(), m, raft::resource::get_cuda_stream(handle)); + thrust::gather(raft::resource::get_thrust_policy(handle), + map, + map + m, + core_dists, + tmp_core_dists.data_handle()); + raft::copy_async( + core_dists, tmp_core_dists.data_handle(), m, raft::resource::get_cuda_stream(handle)); } void scatter(const raft::resources& handle, value_idx* map) { auto tmp_core_dists = raft::make_device_vector(handle, m); - thrust::scatter( - raft::resource::get_thrust_policy(handle), core_dists, core_dists + m, map, tmp_core_dists.data_handle()); - raft::copy_async(core_dists, tmp_core_dists.data_handle(), m, raft::resource::get_cuda_stream(handle)); + thrust::scatter(raft::resource::get_thrust_policy(handle), + core_dists, + core_dists + m, + map, + tmp_core_dists.data_handle()); + raft::copy_async( + core_dists, tmp_core_dists.data_handle(), m, raft::resource::get_cuda_stream(handle)); } }; @@ -462,25 +470,29 @@ class ConnectComponentsEdgesTest auto stream = resource::get_cuda_stream(handle); - params = ::testing::TestWithParam>::GetParam(); + params = ::testing::TestWithParam< + ConnectComponentsMutualReachabilityInputs>::GetParam(); raft::sparse::COO out_edges(resource::get_cuda_stream(handle)); rmm::device_uvector data(params.n_row * params.n_col, resource::get_cuda_stream(handle)); - rmm::device_uvector core_dists(params.n_row, - resource::get_cuda_stream(handle)); - rmm::device_uvector colors(params.n_row, - resource::get_cuda_stream(handle)); + rmm::device_uvector core_dists(params.n_row, resource::get_cuda_stream(handle)); + rmm::device_uvector colors(params.n_row, resource::get_cuda_stream(handle)); raft::copy(data.data(), params.data.data(), data.size(), resource::get_cuda_stream(handle)); - raft::copy(core_dists.data(), params.core_dists.data(), core_dists.size(), resource::get_cuda_stream(handle)); - raft::copy(colors.data(), params.colors.data(), colors.size(), resource::get_cuda_stream(handle)); + raft::copy(core_dists.data(), + params.core_dists.data(), + core_dists.size(), + resource::get_cuda_stream(handle)); + raft::copy( + colors.data(), params.colors.data(), colors.size(), resource::get_cuda_stream(handle)); /** * 3. connect_components to fix connectivities */ - MutualReachabilityFixConnectivitiesRedOp red_op(core_dists.data(), params.n_row); + MutualReachabilityFixConnectivitiesRedOp red_op(core_dists.data(), + params.n_row); raft::linkage::connect_components(handle, out_edges, @@ -492,20 +504,14 @@ class ConnectComponentsEdgesTest params.n_row, params.n_col); - ASSERT_TRUE(devArrMatch(out_edges.rows(), - params.expected_rows.data(), - out_edges.nnz, - Compare())); - - ASSERT_TRUE(devArrMatch(out_edges.cols(), - params.expected_cols.data(), - out_edges.nnz, - Compare())); - - ASSERT_TRUE(devArrMatch(out_edges.vals(), - params.expected_vals.data(), - out_edges.nnz, - CompareApprox(1e-4))); + ASSERT_TRUE( + devArrMatch(out_edges.rows(), params.expected_rows.data(), out_edges.nnz, Compare())); + + ASSERT_TRUE( + devArrMatch(out_edges.cols(), params.expected_cols.data(), out_edges.nnz, Compare())); + + ASSERT_TRUE(devArrMatch( + out_edges.vals(), params.expected_vals.data(), out_edges.nnz, CompareApprox(1e-4))); } void SetUp() override { basicTest(); } @@ -519,27 +525,450 @@ class ConnectComponentsEdgesTest const std::vector> mr_fix_conn_inputsf2 = { {100, 2, - {-7.72642,-8.39496,5.4534,0.742305,-2.97867,9.55685,6.04267,0.571319,-6.52184,-6.31932,3.64934,1.40687,-2.17793,9.98983,4.42021,2.33028,4.73696,2.94181,-3.66019,9.38998,-3.05358,9.12521,-6.65217,-5.57297,-6.35769,-6.58313,-3.61553,7.81808,-1.77073,9.18565,-7.95052,-6.39764,-6.60294,-6.05293,-2.58121,10.0178,-7.76348,-6.72638,-6.40639,-6.95294,-2.97262,8.54856,-6.95673,-6.53896,-7.32614,-6.02371,-2.1478,10.5523,-2.54502,10.5789,-2.96984,10.0714,3.22451,1.55252,-6.25396,-7.73727,-7.85431,-6.09303,-8.11658,-8.20057,-7.55965,-6.64786,4.936,2.23423,4.44752,2.27472,-5.72103,-7.70079,-0.929985,9.78172,-3.10984,8.72259,-2.44167,7.58954,-2.18511,8.6292,5.55528,2.30192,4.73164,-0.0143992,-8.2573,-7.81793,-2.98837,8.82863,4.60517,0.804492,-3.83738,9.21115,-2.62485,8.71318,3.57758,2.44676,-8.48711,-6.69548,-6.70645,-6.49479,-6.86663,-5.42658,3.83139,1.47141,2.02013,2.79507,4.64499,1.73858,-1.69667,10.3705,-6.61974,-6.09829,-6.05757,-4.98332,-7.10309,-6.16611,-3.52203,9.32853,-2.26724,7.10101,6.11777,1.4549,-4.23412,8.452,-6.58655,-7.59446,3.93783,1.64551,-7.12502,-7.63385,2.72111,1.94666,-7.14428,-4.15994,-6.66553,-8.12585,4.70011,4.43641,-7.76914,-7.69592,4.11012,2.48644,4.89743,1.89872,4.29716,1.17089,-6.62913,-6.53366,-8.07093,-6.22356,-2.16558,7.25125,4.73953,1.46969,-5.91625,-6.46733,5.43091,1.06378,-6.82142,-8.02308,6.52606,2.14775,3.08922,2.04173,-2.14756,8.36917,3.85663,1.65111,-1.68665,7.79344,-5.01385,-6.40628,-2.52269,7.95658,-2.30033,7.05462,-1.04355,8.78851,3.72045,3.5231,-3.98772,8.29444,4.24777,0.509655,4.72693,1.67416,5.7827,2.7251,-3.41722,7.60198,5.22674,4.16363,-3.1109,10.8666,-3.18612,9.62596,-1.4782,9.94557,4.47859,2.37722,-5.79658,-5.82631,-3.34842,8.70507}, - {0.978428,1.01917,0.608673,1.45629,0.310713,0.689461,0.701126,0.63296,0.774788,0.701648,0.513282,0.757651,0.45638,0.973111,0.901396,0.613692,0.482497,0.688143,0.72428,0.666345,0.58232,0.554756,0.710315,0.903611,0.694115,0.796099,0.639759,0.798998,0.639839,1.30727,0.663729,0.57476,0.571348,1.14662,1.26518,0.485068,0.78207,0.791621,1.01678,1.28509,1.14715,0.381395,0.850507,0.788511,0.588341,0.878516,0.928669,0.405874,0.776421,0.612274,1.84963,0.57476,0.95226,0.488078,1.24868,0.515136,0.589378,0.903632,1.01678,1.09964,0.666345,0.713265,0.877168,1.10053,1.96887,1.03574,2.03728,0.969553,0.774788,0.586338,0.65168,0.435472,0.664396,0.790584,0.678637,0.715964,0.865494,0.978428,1.59242,0.861109,0.833259,0.65168,0.903632,1.49599,0.76347,0.960453,1.1848,1.37398,0.928957,1.07848,0.661798,1.21104,1.04579,1.89047,1.24288,0.529553,0.903611,0.620897,0.882467,0.647189}, - {0,1,2,1,0,1,2,1,1,2,2,0,0,2,2,0,0,2,0,0,2,0,0,2,2,2,1,0,0,0,0,1,1,0,2,2,2,2,1,1,0,2,1,2,2,1,0,0,0,1,1,1,2,0,0,0,2,2,1,2,0,1,0,1,0,0,1,0,1,1,1,0,0,2,1,0,1,0,1,1,2,1,2,0,2,2,2,1,2,1,1,1,2,1,2,2,2,1,0,2}, - {50,54,57,63,82,87}, - {57,63,50,54,87,82}, - {6.0764,11.1843,6.0764,11.1843,6.89004,6.89004}}, + {-7.72642, -8.39496, 5.4534, 0.742305, -2.97867, 9.55685, 6.04267, 0.571319, -6.52184, + -6.31932, 3.64934, 1.40687, -2.17793, 9.98983, 4.42021, 2.33028, 4.73696, 2.94181, + -3.66019, 9.38998, -3.05358, 9.12521, -6.65217, -5.57297, -6.35769, -6.58313, -3.61553, + 7.81808, -1.77073, 9.18565, -7.95052, -6.39764, -6.60294, -6.05293, -2.58121, 10.0178, + -7.76348, -6.72638, -6.40639, -6.95294, -2.97262, 8.54856, -6.95673, -6.53896, -7.32614, + -6.02371, -2.1478, 10.5523, -2.54502, 10.5789, -2.96984, 10.0714, 3.22451, 1.55252, + -6.25396, -7.73727, -7.85431, -6.09303, -8.11658, -8.20057, -7.55965, -6.64786, 4.936, + 2.23423, 4.44752, 2.27472, -5.72103, -7.70079, -0.929985, 9.78172, -3.10984, 8.72259, + -2.44167, 7.58954, -2.18511, 8.6292, 5.55528, 2.30192, 4.73164, -0.0143992, -8.2573, + -7.81793, -2.98837, 8.82863, 4.60517, 0.804492, -3.83738, 9.21115, -2.62485, 8.71318, + 3.57758, 2.44676, -8.48711, -6.69548, -6.70645, -6.49479, -6.86663, -5.42658, 3.83139, + 1.47141, 2.02013, 2.79507, 4.64499, 1.73858, -1.69667, 10.3705, -6.61974, -6.09829, + -6.05757, -4.98332, -7.10309, -6.16611, -3.52203, 9.32853, -2.26724, 7.10101, 6.11777, + 1.4549, -4.23412, 8.452, -6.58655, -7.59446, 3.93783, 1.64551, -7.12502, -7.63385, + 2.72111, 1.94666, -7.14428, -4.15994, -6.66553, -8.12585, 4.70011, 4.43641, -7.76914, + -7.69592, 4.11012, 2.48644, 4.89743, 1.89872, 4.29716, 1.17089, -6.62913, -6.53366, + -8.07093, -6.22356, -2.16558, 7.25125, 4.73953, 1.46969, -5.91625, -6.46733, 5.43091, + 1.06378, -6.82142, -8.02308, 6.52606, 2.14775, 3.08922, 2.04173, -2.14756, 8.36917, + 3.85663, 1.65111, -1.68665, 7.79344, -5.01385, -6.40628, -2.52269, 7.95658, -2.30033, + 7.05462, -1.04355, 8.78851, 3.72045, 3.5231, -3.98772, 8.29444, 4.24777, 0.509655, + 4.72693, 1.67416, 5.7827, 2.7251, -3.41722, 7.60198, 5.22674, 4.16363, -3.1109, + 10.8666, -3.18612, 9.62596, -1.4782, 9.94557, 4.47859, 2.37722, -5.79658, -5.82631, + -3.34842, 8.70507}, + {0.978428, 1.01917, 0.608673, 1.45629, 0.310713, 0.689461, 0.701126, 0.63296, 0.774788, + 0.701648, 0.513282, 0.757651, 0.45638, 0.973111, 0.901396, 0.613692, 0.482497, 0.688143, + 0.72428, 0.666345, 0.58232, 0.554756, 0.710315, 0.903611, 0.694115, 0.796099, 0.639759, + 0.798998, 0.639839, 1.30727, 0.663729, 0.57476, 0.571348, 1.14662, 1.26518, 0.485068, + 0.78207, 0.791621, 1.01678, 1.28509, 1.14715, 0.381395, 0.850507, 0.788511, 0.588341, + 0.878516, 0.928669, 0.405874, 0.776421, 0.612274, 1.84963, 0.57476, 0.95226, 0.488078, + 1.24868, 0.515136, 0.589378, 0.903632, 1.01678, 1.09964, 0.666345, 0.713265, 0.877168, + 1.10053, 1.96887, 1.03574, 2.03728, 0.969553, 0.774788, 0.586338, 0.65168, 0.435472, + 0.664396, 0.790584, 0.678637, 0.715964, 0.865494, 0.978428, 1.59242, 0.861109, 0.833259, + 0.65168, 0.903632, 1.49599, 0.76347, 0.960453, 1.1848, 1.37398, 0.928957, 1.07848, + 0.661798, 1.21104, 1.04579, 1.89047, 1.24288, 0.529553, 0.903611, 0.620897, 0.882467, + 0.647189}, + {0, 1, 2, 1, 0, 1, 2, 1, 1, 2, 2, 0, 0, 2, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 2, + 2, 1, 0, 0, 0, 0, 1, 1, 0, 2, 2, 2, 2, 1, 1, 0, 2, 1, 2, 2, 1, 0, 0, 0, 1, + 1, 1, 2, 0, 0, 0, 2, 2, 1, 2, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 2, 1, + 0, 1, 0, 1, 1, 2, 1, 2, 0, 2, 2, 2, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 1, 0, 2}, + {50, 54, 57, 63, 82, 87}, + {57, 63, 50, 54, 87, 82}, + {6.0764, 11.1843, 6.0764, 11.1843, 6.89004, 6.89004}}, {1000, 2, - {-6.59634,-7.13901,-6.13753,-6.58082,5.19821,2.04918,-2.96856,8.16444,-2.76879,7.51114,-6.82261,-6.61152,5.02008,2.58376,5.55621,2.31966,4.86379,3.33731,5.84639,1.15623,-2.17159,8.60241,-4.97844,-6.94077,-2.31014,8.41407,5.5582,0.402669,5.25265,0.919754,5.85298,2.11489,-3.29245,8.69222,-1.9621,8.81209,-1.53408,8.86723,-2.18227,8.79519,4.60519,2.20738,-6.4759,-6.9043,-7.18766,-6.10045,-9.00148,-7.48793,4.01674,1.41769,-2.45347,10.1085,-3.20892,9.22827,-3.18612,9.62596,4.81977,3.36517,4.90693,2.8628,-6.44269,-5.68946,-8.30144,-5.37878,4.61485,2.79094,-1.98726,9.31127,-3.66019,9.38998,-6.58607,-8.23669,-7.46015,-6.29153,4.08468,3.85433,-6.36842,-5.50645,-6.83602,-5.18506,-0.627173,10.3597,3.98846,1.48928,-2.9968,8.58173,-7.2144,-7.28376,-0.660242,10.1409,-4.23528,-8.38308,-3.15984,8.52716,-2.40987,9.76567,-8.7548,-6.76508,4.56971,0.312209,-7.5487,-5.8402,-1.6096,9.32159,5.04813,0.270586,-7.6525,-6.47306,-1.79758,7.88964,-9.0153,-3.74236,-3.5715,9.48788,-1.65154,8.85435,-3.47412,9.70034,6.31245,2.39219,4.03851,2.29295,-3.17098,9.86672,-6.90693,-7.81338,-6.22373,-6.68537,-3.22204,9.12072,-0.365254,9.6482,-7.76712,-7.31757,4.15669,3.54716,4.1937,0.083629,-3.03896,9.52755,-6.29293,-7.35501,-2.95926,9.63714,4.02709,1.58547,4.56828,1.93595,5.6242,1.75918,-7.36237,-7.83344,5.32177,3.81988,-2.43183,8.153,-1.97939,10.4559,-3.49492,9.51833,3.39602,1.28026,-2.42215,8.71528,-3.57682,8.87191,-2.77385,11.7345,5.71351,0.946654,-6.50253,-6.90937,4.08239,0.603367,-5.64134,-6.85884,-2.76177,7.7665,-2.25165,8.93984,-3.49071,9.47639,-1.06792,7.57842,5.15754,1.24743,3.63574,1.20537,-6.07969,-8.49642,4.12227,2.19696,-7.17144,-8.4433,-1.92234,11.2047,3.23237,1.19535,3.85389,0.641937,4.82665,1.21779,-7.68923,-6.45605,-7.00816,-8.76196,-5.12894,9.83619,-5.66247,-5.35879,3.05598,2.73358,6.06038,1.40242,-1.69568,7.78342,5.13391,2.23384,-2.96984,10.0714,-5.36618,-6.2493,5.55896,1.6829,3.55882,2.58911,5.36155,0.844118,-0.0634456,9.14351,4.88368,1.40909,-7.04675,-6.59753,-7.78333,-6.55575,5.39881,2.25436,-2.85189,8.64285,-2.22821,8.39159,3.88591,1.69249,-7.55481,-7.02463,4.60032,2.65467,-6.90615,-7.76198,-6.76005,-7.85318,4.15044,3.01733,-7.18884,-7.63227,4.68874,2.01376,3.51716,2.35558,-3.81367,9.68396,4.42644,3.4639,4.81758,0.637825,-6.20705,-4.98023,-1.68603,9.0876,-4.99504,-5.33687,-1.77073,9.18565,4.86433,3.02027,4.20538,1.664,4.59042,2.64799,-3.09856,9.86389,-3.02306,7.95507,-6.32402,-6.79053,-7.67205,-7.18807,-8.10918,-6.38341,-1.67979,6.80315,4.00249,3.16219,-2.54391,7.84561,-3.22764,8.80084,-2.63712,8.05875,-2.41744,7.02672,-6.71117,-5.56251,5.18348,1.60256,-7.40824,-6.29375,-4.22233,10.3682,4.8509,1.87646,-2.99456,9.09616,5.1332,2.15801,-2.27358,9.78515,-6.73874,-8.64855,4.96124,2.39509,-3.70949,8.67978,-4.13674,9.06237,2.80367,2.48116,-0.876786,7.58414,-3.7005,9.67084,6.48652,0.903085,6.28189,2.98299,-6.07922,-6.12582,-5.67921,-7.537,4.55014,3.41329,-1.63688,9.19763,-4.02439,10.3812,5.23053,3.08187,-2.2951,7.76855,-6.24491,-5.77041,6.02415,2.53708,-6.91286,-7.08823,4.83193,1.66405,-7.07454,-5.74634,-2.09576,10.8911,3.29543,1.05452,-3.49973,8.44799,5.2922,0.396778,-2.54502,10.5789,-6.38865,-6.14523,-1.75221,8.09212,-9.30387,-5.99606,-2.98113,10.1032,-6.2017,-7.36802,4.63628,0.814805,-1.81905,8.61307,4.88926,3.55062,3.08325,2.57918,-2.51717,10.4942,-5.75358,-6.9315,6.36742,2.40949,5.74806,0.933264,4.74408,1.91058,-7.41496,-6.97064,-2.98414,8.36096,6.72825,1.83358,-2.95349,9.39159,-3.35599,7.49944,6.18738,3.76905,-3.17182,9.58488,5.17863,1.0525,-3.0397,8.43847,-2.23874,8.96405,3.04689,2.41364,6.14064,2.82339,-6.33334,-6.87369,-7.92444,-8.84647,3.65129,0.86958,5.29842,3.98337,-2.06538,9.78892,-6.89494,-6.30082,-2.52144,8.11703,-8.11398,-7.47257,5.3381,2.36666,-6.93452,-6.59456,-7.50634,-6.01772,6.23438,1.12621,-2.15218,8.32138,-7.04777,-7.3522,-2.52771,8.72563,-2.77907,8.03552,4.29123,1.62391,-8.07551,-6.43551,-3.28202,8.77747,-2.21308,9.27534,-8.25153,-8.49367,-3.54644,8.82395,-8.05867,-5.69243,4.46681,1.98875,3.8362,3.61229,-6.96231,-7.00186,5.18993,1.00483,-5.35116,-6.37227,5.23298,1.66362,-5.68306,-7.03864,-9.03144,-7.59926,-6.10127,-7.4313,4.83572,0.994797,-7.32695,-5.59909,0.569683,10.1339,3.35957,2.84563,-2.4122,9.60944,5.00855,1.57983,-2.57528,7.80327,3.96349,3.77411,4.59429,2.21651,-6.54765,-6.68961,4.76798,1.29212,-1.67351,7.88458,5.63615,1.47941,-2.5301,9.13161,4.26075,1.76959,4.67788,2.0932,4.39955,1.59835,3.91274,1.72565,-4.1786,9.55765,-7.34566,-8.47481,4.8364,2.68217,-7.36848,-7.99973,-5.84708,-5.7534,5.37252,1.89245,-2.1707,8.599,-1.3299,9.0818,-6.79122,-5.40258,5.56391,1.78827,-0.194539,7.14702,4.60489,3.74397,5.50995,2.46885,-3.98772,8.29444,-5.21837,-7.33721,-1.63959,10.3699,-5.92932,-5.1695,-5.88358,-7.6369,4.11716,3.02218,-6.54114,-7.17551,3.97179,2.96521,-6.75325,-4.94118,5.26169,0.402945,3.25031,0.327771,-0.44845,10.7696,-2.15141,9.57507,7.04329,1.91555,-3.74615,7.69383,-7.52318,-5.85015,-6.80419,-8.48208,-4.57664,8.92517,4.57574,2.30193,4.84098,3.02382,-9.43355,-5.94579,-3.52203,9.32853,3.43018,2.5731,-6.15725,-7.25294,-6.69861,-8.17694,-2.40955,8.51081,-4.82342,-7.98332,-7.10611,-6.51274,5.86755,0.763529,-6.56045,-5.53966,-3.61553,7.81808,4.3825,0.304586,-6.52818,-5.80996,4.59972,0.542395,-6.90603,-6.59995,-6.3585,-6.23489,-6.01915,-7.46319,-5.38694,-7.15123,-7.83475,-6.45651,5.89564,1.07856,-5.15266,-7.27975,-6.97978,-7.08378,5.83493,0.449983,-2.62374,10.2521,-7.34494,-6.98606,-6.79719,-8.33766,3.54757,1.65676,-8.40528,-5.61753,-5.85556,-6.28758,4.66862,3.25162,-6.26047,-4.82261,4.61552,4.11544,-1.36637,9.76622,4.2517,2.14359,-2.45099,7.87132,-0.376164,7.0622,4.34493,3.22091,6.95921,2.36649,-6.70319,-7.24714,-5.56932,-5.48443,-7.43149,-4.32191,-3.23956,9.23074,-5.77255,-7.00049,4.96601,0.722056,-7.88617,-5.74023,4.18757,-0.45071,-7.12569,-7.72336,5.27366,2.38697,3.93487,1.9174,3.19186,-0.225636,-3.41722,7.60198,-3.08286,8.46743,-5.87905,-7.55073,-5.26425,-7.20243,-2.97867,9.55685,-1.23153,8.42272,-2.33602,9.3996,-3.33819,8.45411,-3.58009,9.49676,3.78152,2.67348,-1.54582,9.42707,-4.04331,10.292,3.3452,3.134,-2.75494,8.74156,-3.26555,7.59203,-7.27139,-7.80252,3.5293,3.72544,6.11642,3.35326,4.01611,3.8872,4.89591,2.95586,-7.06677,-5.89438,4.19438,3.42655,-6.11355,-5.65318,-7.59645,-8.74665,-5.80362,-6.8588,3.80453,4.11832,5.70655,3.14247,-4.98084,8.21739,-1.87642,11.285,4.39864,2.32523,-3.48388,9.80137,4.02836,0.566509,-2.41212,9.98293,-5.40846,-7.08943,4.01506,1.99926,-3.43613,8.95476,-7.24458,-7.71932,6.02204,2.62188,-6.29999,-6.55431,6.19038,0.974816,3.55882,3.02632,-7.06011,-3.687,-1.55877,8.43738,-5.14711,-4.64881,4.7167,0.690177,-7.90381,-5.02602,4.17218,2.31967,-0.643423,9.48812,-7.95237,-6.64086,-4.05986,9.08285,-6.24158,-6.37927,-6.6105,-7.2233,-6.21675,-5.70664,-3.29967,9.48575,3.41775,2.68617,-2.24948,8.10997,-2.24931,9.79611,-9.0523,-6.03269,-2.2587,9.36073,5.20965,2.42088,-3.10159,8.1503,-6.67906,-5.73147,4.0687,2.54575,-1.24229,8.30662,-2.09627,8.45056,-7.87801,-6.57832,4.72216,3.03865,-0.929985,9.78172,-8.56307,-7.68598,-7.05257,-5.1684,-7.09076,-7.86729,4.61432,3.1459,-6.34133,-5.8076,-3.82943,10.8457,-8.46082,-5.98507,5.34763,1.4107,-1.68714,10.9111,-1.67886,8.1582,-0.623012,9.18886,-4.21258,8.95874,-2.16744,10.8905,-6.57158,-7.27176,2.14047,4.26411,-8.44217,-7.40916,5.29008,1.87399,4.31824,4.04992,-3.77008,9.93215,-2.72688,10.1131,-6.14278,-7.16144,-3.92457,8.59364,-5.92649,-6.59299,4.68369,1.82617,-6.89905,-7.18329,3.95173,4.22561,-7.66453,-6.23183,-2.44167,7.58954,-6.36603,-7.41281,-6.45081,-6.187,-6.6125,-6.37138,5.46036,2.48044,-2.14756,8.36917,-2.3889,9.52872,3.80752,2.44459,-3.98778,10.158,-6.63887,-4.27843,-8.65266,-5.61819,-7.97003,-5.46918,-5.9604,-7.54825,-0.916011,8.50307,-3.69246,6.97505,-7.98533,-7.09503,-2.30033,7.05462,4.76218,2.51647,-7.04981,-7.33334,3.66401,3.02681,-2.50408,8.7797,7.19996,1.87711,4.01291,3.78562,-0.356015,8.24694,-0.958046,9.12996,4.60675,3.76773,6.21945,1.45031,4.27744,0.8535,-4.72232,-7.48582,6.03923,2.8978,-3.26833,9.16468,-7.97059,-7.29092,-2.3998,9.74005,-2.66721,8.58741,-7.36269,-6.73332,-7.87893,-7.38488,4.65023,0.661333,-4.8171,-7.94764,-4.11564,9.21775,4.80633,2.46562,-2.72887,9.3714,-5.26735,-5.5652,4.9826,2.42992,-6.17018,-7.3156,4.38084,1.77682,5.35084,2.41743,-2.61796,9.416,5.27229,2.94572,-7.52315,-5.95227,-1.45077,7.25555,-3.79916,7.71921,-2.23251,9.84147,3.70054,1.82908,-1.93831,10.1499,-6.18324,-5.9248,-3.33142,9.25797,-6.08536,-8.1344,5.95727,2.17077,4.87366,0.417274,-6.529,-6.39092,-9.24256,-7.88984,-6.36652,-7.13966,-3.90777,9.57726,-7.06252,-5.50523,-2.26423,8.50734,-2.84498,10.6833,5.0391,2.62037,-2.74815,8.10672,3.35945,3.72796,-4.11668,9.19892,5.66903,2.44577,-1.63807,8.68826,-7.42587,-6.48831,6.17063,3.19193,-2.28511,9.02688,-7.10088,-7.15692,4.46293,1.17487,-5.91017,-6.45292,-2.26724,7.10101,-2.43339,8.33712,-4.63309,8.48853,-3.31769,8.51253,-2.49078,10.6907,-1.30798,8.60621,6.30535,2.98754,-5.79384,-6.78213,-1.93213,8.81124,4.55773,3.09047,6.37584,2.17108,4.3927,1.29119,-3.2245,9.69388,-1.69634,9.64392,2.799,0.693593,-2.1426,8.07441,-8.4505,-8.00688,4.736,1.51089,-2.5863,9.35544,-2.94924,9.14503,6.2054,1.90742,5.67172,0.487609,-5.69071,-6.17181,-8.24651,-7.10488,-7.34424,-6.67895,-6.71977,-7.90778,-1.82294,7.40157,-9.40991,-7.16611,-4.37999,8.66277,-1.42615,10.0681,-2.00828,8.03673,-7.50228,-6.6855,-5.65859,-6.29801,-8.02335,-6.77155,-3.40761,9.50621,-2.82447,9.77326,-1.5938,9.34304,-3.5213,7.35943,-3.36961,8.62973,-7.01708,-5.92724,5.20886,3.60157,-1.71817,8.1049,-2.46363,8.36269,-2.77809,7.90776,-2.75459,8.26055,-2.03596,8.94146,-4.53434,9.20074,-7.44387,-6.69556,-6.90099,-7.62732,3.29169,2.71643,6.08686,2.16972,-2.31111,8.86993,-5.75046,7.9899,4.69951,1.32623,4.71851,-0.025031,-6.42374,-4.71511,-8.04974,-8.68209,-3.16103,9.06168,-6.18267,-7.21393,-7.94202,-6.4518,-7.07697,-7.03138,3.93554,0.564708,-1.20372,9.03529,-7.10611,-7.83955,-7.47529,-5.50567,-6.15453,-6.36393,-2.98024,9.24634,-7.75761,-7.70699,-3.08597,9.76968,-8.04954,-9.75237,5.2534,0.950377,5.63789,-0.923086,-5.7065,-6.51047,-8.02132,-7.07377,-8.28594,-6.96322,-7.70722,-6.79397,-2.4962,10.4678,5.02846,4.46617,4.02648,1.6707,-0.319395,8.20599,4.74525,0.639144,-1.0313,8.49602,4.08766,2.6061,3.63826,1.69207,2.55795,3.66963,5.2826,3.30232,-1.04355,8.78851,-6.84762,-7.63353,-4.70868,-7.056,3.53651,-0.179721,-3.38482,7.63149,-5.9265,-6.36702,-0.986074,9.5532,-2.42261,8.85861,-7.42835,-6.78726,-4.02857,8.53005,-8.22675,-7.85172,-5.57529,-8.5426,6.03009,2.53098,-7.10448,-7.53011,-3.4988,8.8885,-2.62485,8.71318,-6.39489,-7.72647,3.93789,1.31027,4.27627,1.91622,-0.923181,7.77647,-5.16017,10.1058,-6.44307,-5.97617,-7.24495,-6.69543,6.27331,0.826824,-6.55655,-7.13246,5.66245,4.41292,-2.13805,8.4103,5.23463,2.82659,-4.86624,-6.74357,-6.14082,-6.26474,-2.67048,9.41834,-1.26311,6.9409,-7.20231,-7.13094,-1.35109,9.80595,3.9906,0.749229,-6.75696,-5.25543,4.84826,-0.0685652,-7.4914,-6.91715,4.46725,2.85683,-2.95571,9.87068,6.32381,1.51429,-6.81177,-6.02734,-2.57188,9.96943,-4.28792,10.5103,3.65025,2.91394,-7.11856,-7.24693,-6.98693,-6.43239,4.7651,1.54376,4.00092,0.65008,-7.14816,-7.7713,-7.58803,-8.39382,4.3321,2.19232,-7.89545,-6.81843,-2.11475,8.5933,-0.743743,9.41927,3.64849,-0.18022,-1.68665,7.79344,4.00214,1.44217,-6.96799,-7.25012,-1.58302,10.9237,-6.68524,-7.23328,4.65831,2.32075,4.62024,2.52566,-4.23412,8.452,-0.822056,9.89593,-7.19868,-7.67614,-3.32742,11.1067,5.27861,0.830165,4.48982,2.09875,-6.58087,-7.6319,-0.880582,7.63418,-7.01088,-6.80326,-7.31601,-6.98972,-6.85883,-7.60811,6.14328,2.85053,-7.49206,-6.51861,-2.28174,10.3214,4.81074,1.78919,-5.58987,-6.20693,4.08096,2.35038,-1.5029,8.43739,4.11536,2.46254,-3.28299,7.76963,4.31953,2.39734,4.91146,0.696421,-1.4782,9.94557,-3.34842,8.70507,-6.97822,-6.86126,4.10012,1.19486,-2.50395,9.06127,4.41891,2.00006,-2.73266,9.72829,3.5436,0.533119,5.78864,0.233456,-6.62589,-6.41242,-2.21942,11.0897,-6.76636,-8.31839,-2.71732,8.52129,-5.20972,-6.48544,3.26056,1.24224,3.45228,2.28299,4.72171,1.87428,-7.52585,-5.1048,5.0695,2.18086,-6.55646,-7.02771,3.23727,3.72275,3.41411,0.508795,-7.80698,-6.64174,-5.90443,-6.37902,-0.387041,10.0468,-1.3506,8.1936,-6.08614,-8.62864,-5.91478,-5.26453,-2.61623,7.97904,4.45459,1.84335,-6.66643,-7.63208,3.6729,1.92546,-1.32976,8.54511,6.31758,1.41958,4.63381,2.81166,-7.01394,-6.0693,-2.7786,9.73183,-2.90131,7.55077,-7.13842,-5.28146,6.71514,1.28398,-6.98408,-7.04893,-3.03946,8.22141,-2.76417,10.5183,-7.35347,-6.89456,4.19345,2.16726,-2.02819,9.23817,4.97076,2.8067,-0.544473,9.04955,4.90727,2.29487,-6.31871,-7.17559,3.71665,0.621485,4.7903,2.33813,-6.47994,-7.53147,-6.80958,-5.71823,-8.07326,-5.96096,4.77342,1.8207,5.71856,1.93466,-2.70156,9.31583,-2.1478,10.5523,4.78855,1.63608,5.53507,2.60834,-7.00058,-6.46058,5.4738,2.43235,-1.34603,9.02452,-7.5337,-8.71074,-7.30893,-7.57253,-5.33752,-4.87402,-7.01364,-6.86542,-7.93331,-7.94791,-5.69392,-6.16116,-7.32291,-7.76491,-6.41965,-7.55783,-7.87996,-7.55785,-6.69005,-5.87906,3.92147,2.86809,-1.5552,9.66568,5.07989,1.47112,-7.48524,-5.0541,-1.82724,8.70402,-2.00421,9.88004,-2.62153,8.79332,-7.52111,-6.44819,4.06424,2.09518,-6.65494,-5.94752,6.93878,1.61033,-3.95728,7.60682,5.67016,2.21196,-7.81507,-5.79413,-2.41152,8.24128,-3.83738,9.21115,4.5516,4.55288,-5.75551,-5.93258,4.56545,2.59384,-7.45614,-9.47115,-2.39568,9.67642,5.57816,1.45712,-7.48184,-6.41134,-1.99415,12.867,-8.35854,-6.69675,-7.52559,-7.6793,5.7454,3.1602,2.94692,1.87483,-8.77324,-6.66682,-3.21125,8.68662,-6.25806,-7.24972,5.17639,1.0747,-2.44897,11.4775,-3.30172,8.89955,-2.85191,8.21201,-8.85893,-6.1322,4.08957,1.30155,-5.88132,-7.31173,-7.10309,-7.22943,-2.46068,8.18334,-7.01226,-7.85464,4.75411,2.12347,-3.42862,10.5642,7.16681,1.4423,5.42568,2.39863,-6.00833,-8.22609,-1.7619,9.62466,-2.49527,8.99016,-2.98837,8.82863,-2.97262,8.54856,-1.34142,9.26871,-5.99652,-6.95795,-1.87061,7.35277,-8.68277,-8.46425,-7.01808,-8.10441,-7.04269,-7.62501,-7.69783,-6.88348,-2.19829,10.4896,4.67396,1.2032,-5.58263,-6.90298,-5.69224,-4.29055,4.77285,1.27305,-3.33469,8.6929,-2.54195,8.47086,4.46492,1.21742,5.41158,-0.875373,-8.68069,-7.42278,-3.88687,8.07646,4.6682,2.00293,-8.29799,-8.64092,-1.86382,10.3829,-6.51234,-5.04193,4.54458,2.25219,-1.93264,9.32554,-3.06285,7.81641,-6.90714,-5.10786,4.69653,2.50286,6.43757,2.61401,-1.85483,8.9587,4.60224,3.07647,4.4492,2.1906,5.02181,2.40321,-2.22923,7.8888,5.68943,1.43793,-6.71097,-6.43817,-5.00633,-5.80006,-2.43763,8.53663,5.72577,2.44787,-6.57079,-5.17789,-5.77867,-4.92176,-6.57222,-6.06437,3.96639,2.25216,-7.95177,-9.80146,4.92574,2.30763,-7.6221,-8.20013,-6.4132,-6.91575,4.01432,2.36897,3.0833,1.54505,-1.99416,9.52807,-7.85128,-8.25973,-0.86423,8.76525,-6.31412,-8.64087,-8.07355,-6.73717,-2.52821,8.01176,-5.82357,-6.65687,-7.08865,-7.73063,-5.56251,-6.99818,-2.12513,8.98159,-6.89834,-7.26863,-7.92654,-6.34346,4.86201,1.49442,4.92905,4.42847,-5.57789,-5.3186,4.34232,3.34888,2.64614,2.34723,-4.10363,8.41491,-2.18648,8.18706,-3.39871,8.19848,-2.66098,9.6026,-6.95927,-6.42774,-5.61392,-7.74628,5.60376,4.18369,5.28536,4.13642,4.8428,0.457426,-6.33816,-6.12095,-2.4394,8.62897,4.56938,2.45967,4.0582,0.958413,5.62164,1.64834,5.73119,2.58231,4.66806,1.96405,-6.71905,-6.87706,-2.18503,8.88414,-6.03901,-6.33338,-8.38435,-6.12005,0.0641622,9.0735,5.19967,3.05395,-5.48716,-7.13016,-6.85541,-5.46789,-1.88353,8.15713,4.27891,3.1325,-2.75816,9.98586,-2.03022,9.34795,-7.66741,-7.50096,-3.39305,9.16801,-8.49476,-5.71537,-1.68378,9.8278,-7.41559,-6.07205,-3.15577,7.93274,5.22381,1.61388,3.65739,1.74854,4.94251,1.21889,-7.12832,-5.27276,-9.58286,-6.20223,-2.21613,8.29993,5.34799,2.92987,4.09496,2.37231,-7.25183,-5.79136,-6.46981,-7.12137,-6.28607,-9.8205,4.52865,1.06926,-3.10984,8.72259,3.61865,2.68153,-5.96604,-7.68329,3.11435,1.28126,-1.1064,7.61243,-2.17688,8.2658,-3.27246,7.2094,-5.55143,-6.32388,-1.69667,10.3705,-2.16558,7.25125,-6.36572,-6.70053,4.12259,3.38252,-4.80554,-7.79949,-5.23966,-6.13798,4.21969,1.69139,-1.98985,10.547,-2.52269,7.95658,-6.75642,-6.32862,-3.51521,7.8001,4.70435,-0.00229688,6.25359,2.4267,5.82935,0.745562,5.24778,2.15978,5.48052,1.32055,-3.05358,9.12521,-3.18922,9.24654,4.47276,2.11988,5.36751,2.02512,-2.18511,8.6292,-2.48469,9.51228,5.57556,3.24472,-2.58121,10.0178,-6.12629,-6.49895,-4.54732,8.0062,-4.20166,10.5438,-7.61422,-7.69036,-4.42797,8.98777,4.45301,1.53344,4.59296,2.45021,-6.81264,-6.36417,4.62346,3.16156,-5.93007,-8.36501,-2.78425,6.71237,-6.17141,-6.64689,-5.20608,8.95999,-7.30598,-5.73166,4.39572,2.93726,-1.89503,9.77179,-5.683,-7.48989,4.80924,0.559455,-2.17793,9.98983,5.23728,2.67434,-7.03976,-6.20877,3.90435,3.20926,-7.78536,-7.53388,-1.00684,9.08838,-5.26741,-5.98327,3.28002,2.71942,-1.47166,8.50427,-2.32733,9.26251,5.16271,1.39947,-6.59093,-6.61979,-2.44492,7.93654,-1.05805,9.97356,-3.1109,10.8666,3.38834,3.41693,4.83098,2.01961,-2.74013,9.71049,-3.34892,8.41489,4.94768,0.263001,3.57477,1.66795,5.78915,1.26999,-4.81812,-5.67174,-1.88508,9.64263,3.69048,4.60555,4.03037,1.7862,-7.4418,-7.08933}, - {0.127717,0.211407,0.195547,0.21633,0.39671,0.229008,0.20839,0.169236,0.314314,0.322473,0.169506,0.45499,0.147819,0.296502,0.15198,0.356444,0.0992833,0.220833,0.296206,0.178067,0.135359,0.189725,0.243099,0.519986,0.168105,0.273465,0.126033,0.18045,0.282832,0.193901,0.213704,0.425046,0.203191,0.228674,0.209267,0.355039,0.212918,0.315495,0.294112,0.257576,0.5786,0.186019,0.171919,0.171919,0.449151,1.34947,0.171919,0.16341,0.641387,0.342115,0.267343,0.246125,0.277612,0.181462,0.22944,1.95598,0.164897,0.235803,0.228273,0.314629,0.127403,0.241241,0.189362,0.151691,0.130085,0.526707,0.217069,0.282306,0.531523,0.177035,0.169776,0.20395,0.177165,0.146628,0.280013,0.223033,0.50947,0.184133,0.295329,0.183219,0.28166,0.179348,0.276462,1.00283,0.248147,0.214453,0.231732,0.170672,0.256893,0.133271,0.151137,0.500823,0.23678,0.376983,0.362061,0.140013,0.388863,0.398552,0.38015,0.190081,0.167115,0.206884,0.473849,1.05117,0.435665,0.323618,0.326201,0.32226,0.201787,0.246496,0.28325,0.226596,0.238153,0.277268,0.674629,0.179433,0.175651,0.154778,0.178195,0.192796,0.103571,0.227621,0.201124,0.160525,0.160964,0.240099,0.258027,0.134127,0.127717,0.341378,0.311595,0.282306,0.168988,0.40775,0.246125,0.583131,0.236804,0.238633,0.194824,0.169315,0.244227,0.249511,0.189725,0.305662,0.301415,0.658641,0.250944,0.151792,0.141383,0.143843,0.563347,0.184216,0.204155,0.221764,0.314908,0.144518,0.228808,0.255785,0.163457,0.424705,0.170202,0.312598,0.300629,0.532614,0.661392,0.228273,0.543432,0.257175,0.258994,0.281413,0.273897,0.246837,0.293489,0.25533,0.260492,0.213704,0.3091,0.17103,0.172285,0.241399,0.35999,0.372243,0.269191,0.390239,0.31761,0.200593,0.22197,0.752914,0.266571,0.13102,0.268659,0.293723,0.356294,0.296258,0.264531,0.15468,0.358535,0.243711,0.112147,0.121659,0.197101,0.515292,0.245628,0.279863,0.789807,0.195156,0.196073,0.149564,0.118675,0.389373,0.233821,0.176128,0.481088,0.360027,0.553152,0.208207,0.171608,0.160489,0.334298,0.139426,0.168603,0.266199,0.326458,0.103571,0.171208,0.130961,0.190887,0.177229,0.241651,0.115152,0.196753,0.481088,0.230965,0.354631,0.14591,0.328543,0.141544,0.195888,0.290379,0.245954,0.184547,0.575214,0.186929,0.28527,0.292213,1.20033,0.281528,0.15625,0.211524,0.186398,0.298061,0.147393,0.245349,0.164527,0.224771,0.222382,0.251643,0.148835,0.135359,0.204967,0.193024,0.486309,0.389686,0.211921,0.307405,0.38666,0.26802,0.16605,0.323134,0.268397,0.217894,0.974118,0.371618,0.156201,0.305787,0.339305,0.371032,0.381765,0.22747,0.24906,0.100884,0.253192,0.314253,0.388289,0.580947,1.00267,0.241998,0.489101,0.341501,0.247423,0.328311,0.440281,0.14927,0.244469,0.846828,0.191725,0.217429,0.123403,0.322875,0.145373,0.757259,0.190086,0.316286,0.268397,0.296721,0.440472,0.186848,0.232134,0.180239,0.219724,0.205886,0.250975,0.145636,0.312476,0.366418,0.128135,0.315235,0.264531,0.161815,0.31631,0.296489,0.37171,0.197217,0.195625,0.479579,0.443037,0.323347,0.193616,0.160251,0.8952,0.256291,0.593345,0.177165,0.409514,0.847863,0.111448,0.210031,0.251347,0.351953,0.705204,0.117901,0.182343,0.230179,0.83632,0.22104,0.145163,0.200326,0.23431,0.21868,0.253575,0.186562,0.192757,0.172716,0.27396,0.258581,0.327892,0.376138,0.223477,0.302375,0.145845,0.436902,0.421794,0.328543,0.19246,0.238889,0.254866,0.284674,0.457849,0.202937,0.392568,0.453083,0.782713,0.465401,0.178623,0.304863,0.190081,0.228641,0.255135,0.245037,0.217526,0.109584,0.276462,0.182301,0.38582,0.349942,1.3889,0.30235,0.796353,0.160168,0.643204,0.153752,0.410268,0.186439,0.256834,0.185783,0.0957629,0.226596,0.197951,0.17123,0.192836,0.18405,0.575784,0.228874,0.201787,0.241209,0.217386,0.195751,0.291585,0.144531,0.14176,0.157635,0.410268,0.476338,0.308148,0.148077,0.152093,0.196791,0.568087,0.414026,0.250587,0.473463,0.293645,0.396768,0.2766,0.38664,0.135034,1.50827,0.472527,0.268418,0.40383,0.375914,0.246496,0.176474,0.340405,0.220833,0.138782,0.159009,0.444219,0.259582,0.33638,0.195586,0.210974,0.200288,0.148129,0.0974216,0.211588,0.280081,0.44113,0.773921,0.553848,0.448079,0.183136,0.380854,0.685021,0.308767,0.553276,0.181578,0.164759,0.313889,0.137886,0.545387,0.278449,0.736895,0.360054,0.358929,0.457315,0.343278,0.507662,0.280829,0.113886,0.23146,0.160584,0.192796,0.147561,0.241272,0.168988,0.730511,0.27836,0.179847,0.22555,0.418069,0.158348,0.128965,0.179454,0.126366,0.164434,0.273633,0.309556,0.500823,0.367852,0.192875,0.230262,0.32724,0.249969,0.142618,0.494229,0.36108,0.227931,0.23113,0.742825,0.190126,0.33741,0.280598,0.145268,0.378423,0.211921,0.183594,0.59201,0.279563,0.195683,0.248101,0.199754,0.342494,0.174343,0.14149,0.28085,0.175781,0.518738,0.17223,0.489904,0.181167,0.354286,0.297824,0.280829,0.219412,0.22814,0.195625,0.313949,0.294708,0.211551,0.236255,0.666933,0.204808,0.52591,0.180725,0.186889,0.246589,0.410575,0.338348,0.206219,0.361766,0.158143,0.280816,0.4149,0.773082,0.340046,0.369672,0.256923,0.167195,0.197217,0.252339,0.172716,0.191526,0.263085,0.345698,0.168286,0.243099,0.434631,0.22944,0.161862,0.206589,0.23457,0.181924,0.419063,0.183427,0.186152,0.236352,0.306336,0.149002,1.50086,0.188231,0.442757,0.485602,0.466662,0.17329,0.141329,0.180619,0.160061,0.192569,0.270999,0.117901,0.362693,0.217561,0.208975,0.233658,0.175173,1.10307,0.14625,1.31124,0.237608,0.286784,0.325112,0.2485,0.259641,0.553152,0.179039,0.780781,0.174758,0.297824,0.2558,0.235949,0.952186,0.356744,0.312646,0.189362,0.574524,0.705204,0.213168,0.225956,0.424165,0.169506,0.137109,0.352451,0.454554,0.653302,0.31261,0.194412,0.23719,0.137886,0.31498,0.199085,0.203875,0.597248,1.10036,0.196869,0.22104,0.451345,0.105613,0.683928,0.135204,0.25533,0.607871,0.219724,0.184464,0.725001,0.160061,0.333407,0.192569,0.234147,0.47178,0.161815,0.242455,0.215305,0.410575,0.242376,0.211335,0.462804,0.275065,0.126878,0.170404,0.179433,0.147244,0.109584,0.352905,0.158215,0.197604,0.172407,0.407506,0.645446,0.313061,0.165602,0.136663,0.55444,0.15527,0.133128,0.125912,0.340405,0.44521,0.122783,0.814526,0.243773,0.15743,0.266743,0.684458,0.22221,0.181294,0.193901,0.258802,0.167195,0.292056,0.132309,0.227671,0.117334,0.271758,0.146185,0.225042,0.225964,0.194863,0.290274,0.138438,0.196714,0.266012,0.267771,0.162544,0.244258,0.358038,0.522617,0.192875,0.45066,0.330396,0.223477,0.42967,0.350884,0.404655,0.123155,0.431583,0.191675,0.147354,0.609034,0.459487,0.187337,0.215128,0.604169,0.330165,0.494229,0.40775,0.167377,0.192648,0.234635,0.275578,0.253094,0.420063,0.228299,0.206478,0.20395,0.377656,0.317393,0.478623,0.159009,0.217034,0.300933,0.139754,0.153901,0.261077,0.22834,0.449609,0.157672,0.176474,0.285704,0.180186,0.212738,0.266428,0.388313,0.0954637,0.298093,0.251643,0.330696,0.159572,0.210666,0.149411,0.139618,0.338472,0.450304,0.208793,0.583609,0.185865,0.400576,0.21626,0.174867,0.239144,0.249113,0.200402,0.275065,0.238793,0.205784,0.4475,0.231262,0.259082,0.20934,0.16806,0.193616,0.213811,0.395632,0.482465,0.274649,0.307405,0.165866,0.334275,0.683337,0.368825,0.14625,0.780742,0.163457,0.226596,0.138713,1.79155,0.400443,0.233658,0.426399,0.623024,0.670955,0.123588,0.110899,0.173751,0.651068,0.199983,0.190887,0.541435,0.21324,0.266571,0.134638,0.179348,0.145636,0.170929,0.623252,0.587738,0.109688,0.515314,0.217666,0.213311,0.249144,0.187947,0.270999,0.268311,0.469782,0.763609,0.32124,0.146315,0.265223,0.298694,0.197623,0.21349,0.845778,0.175466,0.123588,0.17223,0.258603,1.17119,0.538142,0.407675,0.120288,0.587238,0.244664,0.333956,0.132812,0.21399,0.302375,0.275882,0.134284,0.377555,0.228541,0.187307,0.143804,0.180545,0.222451,0.239638,0.188028,0.46334,0.175868,0.242392,0.314762,0.44473,0.21962,0.175966,1.12364,0.138837,0.400576,0.18184,0.137706,0.409763,0.216894,0.466662,0.376604,0.487155,0.283143,0.118547,0.221591,0.122783,0.179007,0.16628,0.180999,0.239845,0.169607,0.578402,0.396537,0.222288,0.563237,0.371238,0.138658,0.324336,0.191526,0.168603,0.357715,0.640905,0.460706,0.220902,0.240797,0.164062,0.157853,0.34457,0.196092,0.289353,0.104597,0.259641,0.126878,0.175781,0.441458,0.820108,0.261864,0.23431,0.254506,0.271955,0.227529,0.22834,0.196753,0.224906,0.193783,0.419481,0.236933,0.229706,0.29785,0.222947,0.177606,0.216911,0.305188,0.933438,0.116666,0.278483,0.0973824,0.271224,0.127717,1.28139,0.276283,0.180704,0.234554,0.285984,0.290172,0.49594,0.135879,0.436784,0.206219,0.342215,0.374165,0.182217,0.274864,0.625,0.356925,0.194324,0.342215,0.113012,0.155123,0.254207,0.438919,0.262548,0.302299,0.179528,0.312744,0.168513,0.142618,0.150543,0.231361,0.166004,0.186725,0.38848,0.179857,0.182301,0.629476,0.44113,0.289669,0.328543,0.279938,0.14625,0.187174,0.157635,0.396749,0.798931,0.201541,0.778619,0.265883,0.258027,0.218576,0.266571,0.160168,0.230303,0.273633,0.233298,0.30175,0.217069,0.345145,0.397901,0.224499,0.248101,0.241335,0.222947,0.237094,0.176518,0.380032,0.634775,0.426193,0.16362,0.231097,0.219898,0.343789,0.275578,0.282022,0.628542,0.232184,0.848367,0.200754,0.179177}, - {0,0,2,3,3,0,2,2,2,2,3,0,3,2,2,2,3,3,3,3,2,0,0,0,2,3,3,3,2,2,0,0,2,3,3,0,0,2,0,0,3,2,3,0,3,0,3,3,0,2,0,3,2,0,3,0,3,3,3,2,2,3,0,0,3,3,0,2,2,3,0,3,2,2,2,0,2,3,3,3,2,3,3,3,2,0,2,0,3,3,3,3,2,2,0,2,0,3,2,2,2,0,0,3,0,2,2,3,2,3,0,2,2,2,3,2,0,0,2,3,3,2,0,2,0,0,2,0,2,2,3,2,2,0,3,0,3,2,2,2,3,3,0,0,0,3,2,3,3,3,3,0,2,0,3,2,3,2,3,0,2,3,3,2,3,3,2,2,0,0,2,3,3,2,3,0,2,0,2,0,3,2,3,2,3,0,3,0,3,0,2,3,2,2,3,0,2,2,2,0,3,2,3,3,2,3,2,3,3,2,2,0,0,2,2,3,0,3,0,2,0,0,2,3,0,3,3,2,0,3,3,0,3,0,2,2,0,2,0,2,0,0,0,2,0,3,2,3,2,3,2,2,0,2,3,2,3,2,2,2,2,3,0,2,0,0,2,3,3,0,2,3,2,2,3,0,3,0,0,2,0,2,0,2,2,3,3,2,3,0,0,3,2,2,0,3,2,0,0,3,0,0,2,0,3,2,0,2,0,0,0,0,0,2,0,0,2,3,0,0,2,0,0,2,0,2,3,2,3,3,2,2,0,0,0,3,0,2,0,2,0,2,2,2,3,3,0,0,3,3,3,3,3,2,3,3,2,3,3,0,2,2,2,2,0,2,0,0,0,2,2,3,3,2,3,2,3,0,2,3,0,2,0,2,2,0,3,0,2,0,2,3,0,3,0,0,0,3,2,3,3,0,3,2,3,0,2,3,3,0,2,3,0,0,0,2,0,3,0,2,3,3,3,3,3,0,2,0,2,2,3,3,0,3,0,2,0,2,0,3,0,0,0,2,3,3,2,3,0,0,0,0,3,3,0,3,2,0,2,3,2,2,3,3,2,2,2,0,2,3,0,3,3,0,0,2,0,3,2,3,0,2,0,2,2,3,2,0,3,3,3,2,3,0,3,0,2,2,0,0,0,3,0,3,3,2,3,2,3,2,3,0,2,3,0,2,0,3,3,3,3,3,3,2,0,3,2,2,2,3,3,2,3,0,2,3,3,2,2,0,0,0,0,3,0,3,3,3,0,0,0,3,3,3,3,3,0,2,3,3,3,3,3,3,0,0,2,2,3,3,2,2,0,0,3,0,0,0,2,3,0,0,0,3,0,3,0,2,2,0,0,0,0,3,2,2,3,2,3,2,2,2,2,3,0,0,2,3,0,3,3,0,3,0,0,2,0,3,3,0,2,2,3,3,0,0,2,0,2,3,2,0,0,3,3,0,3,2,0,2,0,2,3,2,0,3,3,2,0,0,2,2,0,0,2,0,3,3,2,3,2,0,3,0,2,2,3,3,0,3,2,2,0,3,0,0,0,2,0,3,2,0,2,3,2,3,2,2,3,3,0,2,3,2,3,2,2,0,3,0,3,0,2,2,2,0,2,0,2,2,0,0,3,3,0,0,3,2,0,2,3,2,2,0,3,3,0,2,0,3,3,0,2,3,2,3,2,0,2,2,0,0,0,2,2,3,3,2,2,0,2,3,0,0,0,0,0,0,0,0,0,0,2,3,2,0,3,3,3,0,2,0,2,3,2,0,3,3,2,0,2,0,3,2,0,3,0,0,2,2,0,3,0,2,3,3,3,0,2,0,0,3,0,2,3,2,2,0,3,3,3,3,3,0,3,0,0,0,0,3,2,0,0,2,3,3,2,2,0,3,2,0,3,0,2,3,3,0,2,2,3,2,2,2,3,2,0,0,3,2,0,0,0,2,0,2,0,0,2,2,3,0,3,0,0,3,0,0,0,3,0,0,2,2,0,2,2,3,3,3,3,0,0,2,2,2,0,3,2,2,2,2,2,0,3,0,0,3,2,0,0,3,2,3,3,0,3,0,3,0,3,2,2,2,0,0,3,2,2,0,0,0,2,3,2,0,2,3,3,3,0,3,3,0,2,0,0,2,3,3,0,3,2,2,2,2,2,3,3,2,2,3,3,2,3,0,3,3,0,3,2,2,0,2,0,3,0,3,0,2,3,0,2,3,2,0,2,0,3,0,2,3,3,2,0,3,3,3,2,2,3,3,2,2,2,0,3,2,2,0}, - {271,271,329,343,387,426,426,601}, - {426,601,426,387,343,271,329,271}, - {3.70991,4.43491,3.76334,9.43944,9.43944,3.70991,3.76334,4.43491}} -}; + {-6.59634, -7.13901, -6.13753, -6.58082, 5.19821, 2.04918, -2.96856, 8.16444, + -2.76879, 7.51114, -6.82261, -6.61152, 5.02008, 2.58376, 5.55621, 2.31966, + 4.86379, 3.33731, 5.84639, 1.15623, -2.17159, 8.60241, -4.97844, -6.94077, + -2.31014, 8.41407, 5.5582, 0.402669, 5.25265, 0.919754, 5.85298, 2.11489, + -3.29245, 8.69222, -1.9621, 8.81209, -1.53408, 8.86723, -2.18227, 8.79519, + 4.60519, 2.20738, -6.4759, -6.9043, -7.18766, -6.10045, -9.00148, -7.48793, + 4.01674, 1.41769, -2.45347, 10.1085, -3.20892, 9.22827, -3.18612, 9.62596, + 4.81977, 3.36517, 4.90693, 2.8628, -6.44269, -5.68946, -8.30144, -5.37878, + 4.61485, 2.79094, -1.98726, 9.31127, -3.66019, 9.38998, -6.58607, -8.23669, + -7.46015, -6.29153, 4.08468, 3.85433, -6.36842, -5.50645, -6.83602, -5.18506, + -0.627173, 10.3597, 3.98846, 1.48928, -2.9968, 8.58173, -7.2144, -7.28376, + -0.660242, 10.1409, -4.23528, -8.38308, -3.15984, 8.52716, -2.40987, 9.76567, + -8.7548, -6.76508, 4.56971, 0.312209, -7.5487, -5.8402, -1.6096, 9.32159, + 5.04813, 0.270586, -7.6525, -6.47306, -1.79758, 7.88964, -9.0153, -3.74236, + -3.5715, 9.48788, -1.65154, 8.85435, -3.47412, 9.70034, 6.31245, 2.39219, + 4.03851, 2.29295, -3.17098, 9.86672, -6.90693, -7.81338, -6.22373, -6.68537, + -3.22204, 9.12072, -0.365254, 9.6482, -7.76712, -7.31757, 4.15669, 3.54716, + 4.1937, 0.083629, -3.03896, 9.52755, -6.29293, -7.35501, -2.95926, 9.63714, + 4.02709, 1.58547, 4.56828, 1.93595, 5.6242, 1.75918, -7.36237, -7.83344, + 5.32177, 3.81988, -2.43183, 8.153, -1.97939, 10.4559, -3.49492, 9.51833, + 3.39602, 1.28026, -2.42215, 8.71528, -3.57682, 8.87191, -2.77385, 11.7345, + 5.71351, 0.946654, -6.50253, -6.90937, 4.08239, 0.603367, -5.64134, -6.85884, + -2.76177, 7.7665, -2.25165, 8.93984, -3.49071, 9.47639, -1.06792, 7.57842, + 5.15754, 1.24743, 3.63574, 1.20537, -6.07969, -8.49642, 4.12227, 2.19696, + -7.17144, -8.4433, -1.92234, 11.2047, 3.23237, 1.19535, 3.85389, 0.641937, + 4.82665, 1.21779, -7.68923, -6.45605, -7.00816, -8.76196, -5.12894, 9.83619, + -5.66247, -5.35879, 3.05598, 2.73358, 6.06038, 1.40242, -1.69568, 7.78342, + 5.13391, 2.23384, -2.96984, 10.0714, -5.36618, -6.2493, 5.55896, 1.6829, + 3.55882, 2.58911, 5.36155, 0.844118, -0.0634456, 9.14351, 4.88368, 1.40909, + -7.04675, -6.59753, -7.78333, -6.55575, 5.39881, 2.25436, -2.85189, 8.64285, + -2.22821, 8.39159, 3.88591, 1.69249, -7.55481, -7.02463, 4.60032, 2.65467, + -6.90615, -7.76198, -6.76005, -7.85318, 4.15044, 3.01733, -7.18884, -7.63227, + 4.68874, 2.01376, 3.51716, 2.35558, -3.81367, 9.68396, 4.42644, 3.4639, + 4.81758, 0.637825, -6.20705, -4.98023, -1.68603, 9.0876, -4.99504, -5.33687, + -1.77073, 9.18565, 4.86433, 3.02027, 4.20538, 1.664, 4.59042, 2.64799, + -3.09856, 9.86389, -3.02306, 7.95507, -6.32402, -6.79053, -7.67205, -7.18807, + -8.10918, -6.38341, -1.67979, 6.80315, 4.00249, 3.16219, -2.54391, 7.84561, + -3.22764, 8.80084, -2.63712, 8.05875, -2.41744, 7.02672, -6.71117, -5.56251, + 5.18348, 1.60256, -7.40824, -6.29375, -4.22233, 10.3682, 4.8509, 1.87646, + -2.99456, 9.09616, 5.1332, 2.15801, -2.27358, 9.78515, -6.73874, -8.64855, + 4.96124, 2.39509, -3.70949, 8.67978, -4.13674, 9.06237, 2.80367, 2.48116, + -0.876786, 7.58414, -3.7005, 9.67084, 6.48652, 0.903085, 6.28189, 2.98299, + -6.07922, -6.12582, -5.67921, -7.537, 4.55014, 3.41329, -1.63688, 9.19763, + -4.02439, 10.3812, 5.23053, 3.08187, -2.2951, 7.76855, -6.24491, -5.77041, + 6.02415, 2.53708, -6.91286, -7.08823, 4.83193, 1.66405, -7.07454, -5.74634, + -2.09576, 10.8911, 3.29543, 1.05452, -3.49973, 8.44799, 5.2922, 0.396778, + -2.54502, 10.5789, -6.38865, -6.14523, -1.75221, 8.09212, -9.30387, -5.99606, + -2.98113, 10.1032, -6.2017, -7.36802, 4.63628, 0.814805, -1.81905, 8.61307, + 4.88926, 3.55062, 3.08325, 2.57918, -2.51717, 10.4942, -5.75358, -6.9315, + 6.36742, 2.40949, 5.74806, 0.933264, 4.74408, 1.91058, -7.41496, -6.97064, + -2.98414, 8.36096, 6.72825, 1.83358, -2.95349, 9.39159, -3.35599, 7.49944, + 6.18738, 3.76905, -3.17182, 9.58488, 5.17863, 1.0525, -3.0397, 8.43847, + -2.23874, 8.96405, 3.04689, 2.41364, 6.14064, 2.82339, -6.33334, -6.87369, + -7.92444, -8.84647, 3.65129, 0.86958, 5.29842, 3.98337, -2.06538, 9.78892, + -6.89494, -6.30082, -2.52144, 8.11703, -8.11398, -7.47257, 5.3381, 2.36666, + -6.93452, -6.59456, -7.50634, -6.01772, 6.23438, 1.12621, -2.15218, 8.32138, + -7.04777, -7.3522, -2.52771, 8.72563, -2.77907, 8.03552, 4.29123, 1.62391, + -8.07551, -6.43551, -3.28202, 8.77747, -2.21308, 9.27534, -8.25153, -8.49367, + -3.54644, 8.82395, -8.05867, -5.69243, 4.46681, 1.98875, 3.8362, 3.61229, + -6.96231, -7.00186, 5.18993, 1.00483, -5.35116, -6.37227, 5.23298, 1.66362, + -5.68306, -7.03864, -9.03144, -7.59926, -6.10127, -7.4313, 4.83572, 0.994797, + -7.32695, -5.59909, 0.569683, 10.1339, 3.35957, 2.84563, -2.4122, 9.60944, + 5.00855, 1.57983, -2.57528, 7.80327, 3.96349, 3.77411, 4.59429, 2.21651, + -6.54765, -6.68961, 4.76798, 1.29212, -1.67351, 7.88458, 5.63615, 1.47941, + -2.5301, 9.13161, 4.26075, 1.76959, 4.67788, 2.0932, 4.39955, 1.59835, + 3.91274, 1.72565, -4.1786, 9.55765, -7.34566, -8.47481, 4.8364, 2.68217, + -7.36848, -7.99973, -5.84708, -5.7534, 5.37252, 1.89245, -2.1707, 8.599, + -1.3299, 9.0818, -6.79122, -5.40258, 5.56391, 1.78827, -0.194539, 7.14702, + 4.60489, 3.74397, 5.50995, 2.46885, -3.98772, 8.29444, -5.21837, -7.33721, + -1.63959, 10.3699, -5.92932, -5.1695, -5.88358, -7.6369, 4.11716, 3.02218, + -6.54114, -7.17551, 3.97179, 2.96521, -6.75325, -4.94118, 5.26169, 0.402945, + 3.25031, 0.327771, -0.44845, 10.7696, -2.15141, 9.57507, 7.04329, 1.91555, + -3.74615, 7.69383, -7.52318, -5.85015, -6.80419, -8.48208, -4.57664, 8.92517, + 4.57574, 2.30193, 4.84098, 3.02382, -9.43355, -5.94579, -3.52203, 9.32853, + 3.43018, 2.5731, -6.15725, -7.25294, -6.69861, -8.17694, -2.40955, 8.51081, + -4.82342, -7.98332, -7.10611, -6.51274, 5.86755, 0.763529, -6.56045, -5.53966, + -3.61553, 7.81808, 4.3825, 0.304586, -6.52818, -5.80996, 4.59972, 0.542395, + -6.90603, -6.59995, -6.3585, -6.23489, -6.01915, -7.46319, -5.38694, -7.15123, + -7.83475, -6.45651, 5.89564, 1.07856, -5.15266, -7.27975, -6.97978, -7.08378, + 5.83493, 0.449983, -2.62374, 10.2521, -7.34494, -6.98606, -6.79719, -8.33766, + 3.54757, 1.65676, -8.40528, -5.61753, -5.85556, -6.28758, 4.66862, 3.25162, + -6.26047, -4.82261, 4.61552, 4.11544, -1.36637, 9.76622, 4.2517, 2.14359, + -2.45099, 7.87132, -0.376164, 7.0622, 4.34493, 3.22091, 6.95921, 2.36649, + -6.70319, -7.24714, -5.56932, -5.48443, -7.43149, -4.32191, -3.23956, 9.23074, + -5.77255, -7.00049, 4.96601, 0.722056, -7.88617, -5.74023, 4.18757, -0.45071, + -7.12569, -7.72336, 5.27366, 2.38697, 3.93487, 1.9174, 3.19186, -0.225636, + -3.41722, 7.60198, -3.08286, 8.46743, -5.87905, -7.55073, -5.26425, -7.20243, + -2.97867, 9.55685, -1.23153, 8.42272, -2.33602, 9.3996, -3.33819, 8.45411, + -3.58009, 9.49676, 3.78152, 2.67348, -1.54582, 9.42707, -4.04331, 10.292, + 3.3452, 3.134, -2.75494, 8.74156, -3.26555, 7.59203, -7.27139, -7.80252, + 3.5293, 3.72544, 6.11642, 3.35326, 4.01611, 3.8872, 4.89591, 2.95586, + -7.06677, -5.89438, 4.19438, 3.42655, -6.11355, -5.65318, -7.59645, -8.74665, + -5.80362, -6.8588, 3.80453, 4.11832, 5.70655, 3.14247, -4.98084, 8.21739, + -1.87642, 11.285, 4.39864, 2.32523, -3.48388, 9.80137, 4.02836, 0.566509, + -2.41212, 9.98293, -5.40846, -7.08943, 4.01506, 1.99926, -3.43613, 8.95476, + -7.24458, -7.71932, 6.02204, 2.62188, -6.29999, -6.55431, 6.19038, 0.974816, + 3.55882, 3.02632, -7.06011, -3.687, -1.55877, 8.43738, -5.14711, -4.64881, + 4.7167, 0.690177, -7.90381, -5.02602, 4.17218, 2.31967, -0.643423, 9.48812, + -7.95237, -6.64086, -4.05986, 9.08285, -6.24158, -6.37927, -6.6105, -7.2233, + -6.21675, -5.70664, -3.29967, 9.48575, 3.41775, 2.68617, -2.24948, 8.10997, + -2.24931, 9.79611, -9.0523, -6.03269, -2.2587, 9.36073, 5.20965, 2.42088, + -3.10159, 8.1503, -6.67906, -5.73147, 4.0687, 2.54575, -1.24229, 8.30662, + -2.09627, 8.45056, -7.87801, -6.57832, 4.72216, 3.03865, -0.929985, 9.78172, + -8.56307, -7.68598, -7.05257, -5.1684, -7.09076, -7.86729, 4.61432, 3.1459, + -6.34133, -5.8076, -3.82943, 10.8457, -8.46082, -5.98507, 5.34763, 1.4107, + -1.68714, 10.9111, -1.67886, 8.1582, -0.623012, 9.18886, -4.21258, 8.95874, + -2.16744, 10.8905, -6.57158, -7.27176, 2.14047, 4.26411, -8.44217, -7.40916, + 5.29008, 1.87399, 4.31824, 4.04992, -3.77008, 9.93215, -2.72688, 10.1131, + -6.14278, -7.16144, -3.92457, 8.59364, -5.92649, -6.59299, 4.68369, 1.82617, + -6.89905, -7.18329, 3.95173, 4.22561, -7.66453, -6.23183, -2.44167, 7.58954, + -6.36603, -7.41281, -6.45081, -6.187, -6.6125, -6.37138, 5.46036, 2.48044, + -2.14756, 8.36917, -2.3889, 9.52872, 3.80752, 2.44459, -3.98778, 10.158, + -6.63887, -4.27843, -8.65266, -5.61819, -7.97003, -5.46918, -5.9604, -7.54825, + -0.916011, 8.50307, -3.69246, 6.97505, -7.98533, -7.09503, -2.30033, 7.05462, + 4.76218, 2.51647, -7.04981, -7.33334, 3.66401, 3.02681, -2.50408, 8.7797, + 7.19996, 1.87711, 4.01291, 3.78562, -0.356015, 8.24694, -0.958046, 9.12996, + 4.60675, 3.76773, 6.21945, 1.45031, 4.27744, 0.8535, -4.72232, -7.48582, + 6.03923, 2.8978, -3.26833, 9.16468, -7.97059, -7.29092, -2.3998, 9.74005, + -2.66721, 8.58741, -7.36269, -6.73332, -7.87893, -7.38488, 4.65023, 0.661333, + -4.8171, -7.94764, -4.11564, 9.21775, 4.80633, 2.46562, -2.72887, 9.3714, + -5.26735, -5.5652, 4.9826, 2.42992, -6.17018, -7.3156, 4.38084, 1.77682, + 5.35084, 2.41743, -2.61796, 9.416, 5.27229, 2.94572, -7.52315, -5.95227, + -1.45077, 7.25555, -3.79916, 7.71921, -2.23251, 9.84147, 3.70054, 1.82908, + -1.93831, 10.1499, -6.18324, -5.9248, -3.33142, 9.25797, -6.08536, -8.1344, + 5.95727, 2.17077, 4.87366, 0.417274, -6.529, -6.39092, -9.24256, -7.88984, + -6.36652, -7.13966, -3.90777, 9.57726, -7.06252, -5.50523, -2.26423, 8.50734, + -2.84498, 10.6833, 5.0391, 2.62037, -2.74815, 8.10672, 3.35945, 3.72796, + -4.11668, 9.19892, 5.66903, 2.44577, -1.63807, 8.68826, -7.42587, -6.48831, + 6.17063, 3.19193, -2.28511, 9.02688, -7.10088, -7.15692, 4.46293, 1.17487, + -5.91017, -6.45292, -2.26724, 7.10101, -2.43339, 8.33712, -4.63309, 8.48853, + -3.31769, 8.51253, -2.49078, 10.6907, -1.30798, 8.60621, 6.30535, 2.98754, + -5.79384, -6.78213, -1.93213, 8.81124, 4.55773, 3.09047, 6.37584, 2.17108, + 4.3927, 1.29119, -3.2245, 9.69388, -1.69634, 9.64392, 2.799, 0.693593, + -2.1426, 8.07441, -8.4505, -8.00688, 4.736, 1.51089, -2.5863, 9.35544, + -2.94924, 9.14503, 6.2054, 1.90742, 5.67172, 0.487609, -5.69071, -6.17181, + -8.24651, -7.10488, -7.34424, -6.67895, -6.71977, -7.90778, -1.82294, 7.40157, + -9.40991, -7.16611, -4.37999, 8.66277, -1.42615, 10.0681, -2.00828, 8.03673, + -7.50228, -6.6855, -5.65859, -6.29801, -8.02335, -6.77155, -3.40761, 9.50621, + -2.82447, 9.77326, -1.5938, 9.34304, -3.5213, 7.35943, -3.36961, 8.62973, + -7.01708, -5.92724, 5.20886, 3.60157, -1.71817, 8.1049, -2.46363, 8.36269, + -2.77809, 7.90776, -2.75459, 8.26055, -2.03596, 8.94146, -4.53434, 9.20074, + -7.44387, -6.69556, -6.90099, -7.62732, 3.29169, 2.71643, 6.08686, 2.16972, + -2.31111, 8.86993, -5.75046, 7.9899, 4.69951, 1.32623, 4.71851, -0.025031, + -6.42374, -4.71511, -8.04974, -8.68209, -3.16103, 9.06168, -6.18267, -7.21393, + -7.94202, -6.4518, -7.07697, -7.03138, 3.93554, 0.564708, -1.20372, 9.03529, + -7.10611, -7.83955, -7.47529, -5.50567, -6.15453, -6.36393, -2.98024, 9.24634, + -7.75761, -7.70699, -3.08597, 9.76968, -8.04954, -9.75237, 5.2534, 0.950377, + 5.63789, -0.923086, -5.7065, -6.51047, -8.02132, -7.07377, -8.28594, -6.96322, + -7.70722, -6.79397, -2.4962, 10.4678, 5.02846, 4.46617, 4.02648, 1.6707, + -0.319395, 8.20599, 4.74525, 0.639144, -1.0313, 8.49602, 4.08766, 2.6061, + 3.63826, 1.69207, 2.55795, 3.66963, 5.2826, 3.30232, -1.04355, 8.78851, + -6.84762, -7.63353, -4.70868, -7.056, 3.53651, -0.179721, -3.38482, 7.63149, + -5.9265, -6.36702, -0.986074, 9.5532, -2.42261, 8.85861, -7.42835, -6.78726, + -4.02857, 8.53005, -8.22675, -7.85172, -5.57529, -8.5426, 6.03009, 2.53098, + -7.10448, -7.53011, -3.4988, 8.8885, -2.62485, 8.71318, -6.39489, -7.72647, + 3.93789, 1.31027, 4.27627, 1.91622, -0.923181, 7.77647, -5.16017, 10.1058, + -6.44307, -5.97617, -7.24495, -6.69543, 6.27331, 0.826824, -6.55655, -7.13246, + 5.66245, 4.41292, -2.13805, 8.4103, 5.23463, 2.82659, -4.86624, -6.74357, + -6.14082, -6.26474, -2.67048, 9.41834, -1.26311, 6.9409, -7.20231, -7.13094, + -1.35109, 9.80595, 3.9906, 0.749229, -6.75696, -5.25543, 4.84826, -0.0685652, + -7.4914, -6.91715, 4.46725, 2.85683, -2.95571, 9.87068, 6.32381, 1.51429, + -6.81177, -6.02734, -2.57188, 9.96943, -4.28792, 10.5103, 3.65025, 2.91394, + -7.11856, -7.24693, -6.98693, -6.43239, 4.7651, 1.54376, 4.00092, 0.65008, + -7.14816, -7.7713, -7.58803, -8.39382, 4.3321, 2.19232, -7.89545, -6.81843, + -2.11475, 8.5933, -0.743743, 9.41927, 3.64849, -0.18022, -1.68665, 7.79344, + 4.00214, 1.44217, -6.96799, -7.25012, -1.58302, 10.9237, -6.68524, -7.23328, + 4.65831, 2.32075, 4.62024, 2.52566, -4.23412, 8.452, -0.822056, 9.89593, + -7.19868, -7.67614, -3.32742, 11.1067, 5.27861, 0.830165, 4.48982, 2.09875, + -6.58087, -7.6319, -0.880582, 7.63418, -7.01088, -6.80326, -7.31601, -6.98972, + -6.85883, -7.60811, 6.14328, 2.85053, -7.49206, -6.51861, -2.28174, 10.3214, + 4.81074, 1.78919, -5.58987, -6.20693, 4.08096, 2.35038, -1.5029, 8.43739, + 4.11536, 2.46254, -3.28299, 7.76963, 4.31953, 2.39734, 4.91146, 0.696421, + -1.4782, 9.94557, -3.34842, 8.70507, -6.97822, -6.86126, 4.10012, 1.19486, + -2.50395, 9.06127, 4.41891, 2.00006, -2.73266, 9.72829, 3.5436, 0.533119, + 5.78864, 0.233456, -6.62589, -6.41242, -2.21942, 11.0897, -6.76636, -8.31839, + -2.71732, 8.52129, -5.20972, -6.48544, 3.26056, 1.24224, 3.45228, 2.28299, + 4.72171, 1.87428, -7.52585, -5.1048, 5.0695, 2.18086, -6.55646, -7.02771, + 3.23727, 3.72275, 3.41411, 0.508795, -7.80698, -6.64174, -5.90443, -6.37902, + -0.387041, 10.0468, -1.3506, 8.1936, -6.08614, -8.62864, -5.91478, -5.26453, + -2.61623, 7.97904, 4.45459, 1.84335, -6.66643, -7.63208, 3.6729, 1.92546, + -1.32976, 8.54511, 6.31758, 1.41958, 4.63381, 2.81166, -7.01394, -6.0693, + -2.7786, 9.73183, -2.90131, 7.55077, -7.13842, -5.28146, 6.71514, 1.28398, + -6.98408, -7.04893, -3.03946, 8.22141, -2.76417, 10.5183, -7.35347, -6.89456, + 4.19345, 2.16726, -2.02819, 9.23817, 4.97076, 2.8067, -0.544473, 9.04955, + 4.90727, 2.29487, -6.31871, -7.17559, 3.71665, 0.621485, 4.7903, 2.33813, + -6.47994, -7.53147, -6.80958, -5.71823, -8.07326, -5.96096, 4.77342, 1.8207, + 5.71856, 1.93466, -2.70156, 9.31583, -2.1478, 10.5523, 4.78855, 1.63608, + 5.53507, 2.60834, -7.00058, -6.46058, 5.4738, 2.43235, -1.34603, 9.02452, + -7.5337, -8.71074, -7.30893, -7.57253, -5.33752, -4.87402, -7.01364, -6.86542, + -7.93331, -7.94791, -5.69392, -6.16116, -7.32291, -7.76491, -6.41965, -7.55783, + -7.87996, -7.55785, -6.69005, -5.87906, 3.92147, 2.86809, -1.5552, 9.66568, + 5.07989, 1.47112, -7.48524, -5.0541, -1.82724, 8.70402, -2.00421, 9.88004, + -2.62153, 8.79332, -7.52111, -6.44819, 4.06424, 2.09518, -6.65494, -5.94752, + 6.93878, 1.61033, -3.95728, 7.60682, 5.67016, 2.21196, -7.81507, -5.79413, + -2.41152, 8.24128, -3.83738, 9.21115, 4.5516, 4.55288, -5.75551, -5.93258, + 4.56545, 2.59384, -7.45614, -9.47115, -2.39568, 9.67642, 5.57816, 1.45712, + -7.48184, -6.41134, -1.99415, 12.867, -8.35854, -6.69675, -7.52559, -7.6793, + 5.7454, 3.1602, 2.94692, 1.87483, -8.77324, -6.66682, -3.21125, 8.68662, + -6.25806, -7.24972, 5.17639, 1.0747, -2.44897, 11.4775, -3.30172, 8.89955, + -2.85191, 8.21201, -8.85893, -6.1322, 4.08957, 1.30155, -5.88132, -7.31173, + -7.10309, -7.22943, -2.46068, 8.18334, -7.01226, -7.85464, 4.75411, 2.12347, + -3.42862, 10.5642, 7.16681, 1.4423, 5.42568, 2.39863, -6.00833, -8.22609, + -1.7619, 9.62466, -2.49527, 8.99016, -2.98837, 8.82863, -2.97262, 8.54856, + -1.34142, 9.26871, -5.99652, -6.95795, -1.87061, 7.35277, -8.68277, -8.46425, + -7.01808, -8.10441, -7.04269, -7.62501, -7.69783, -6.88348, -2.19829, 10.4896, + 4.67396, 1.2032, -5.58263, -6.90298, -5.69224, -4.29055, 4.77285, 1.27305, + -3.33469, 8.6929, -2.54195, 8.47086, 4.46492, 1.21742, 5.41158, -0.875373, + -8.68069, -7.42278, -3.88687, 8.07646, 4.6682, 2.00293, -8.29799, -8.64092, + -1.86382, 10.3829, -6.51234, -5.04193, 4.54458, 2.25219, -1.93264, 9.32554, + -3.06285, 7.81641, -6.90714, -5.10786, 4.69653, 2.50286, 6.43757, 2.61401, + -1.85483, 8.9587, 4.60224, 3.07647, 4.4492, 2.1906, 5.02181, 2.40321, + -2.22923, 7.8888, 5.68943, 1.43793, -6.71097, -6.43817, -5.00633, -5.80006, + -2.43763, 8.53663, 5.72577, 2.44787, -6.57079, -5.17789, -5.77867, -4.92176, + -6.57222, -6.06437, 3.96639, 2.25216, -7.95177, -9.80146, 4.92574, 2.30763, + -7.6221, -8.20013, -6.4132, -6.91575, 4.01432, 2.36897, 3.0833, 1.54505, + -1.99416, 9.52807, -7.85128, -8.25973, -0.86423, 8.76525, -6.31412, -8.64087, + -8.07355, -6.73717, -2.52821, 8.01176, -5.82357, -6.65687, -7.08865, -7.73063, + -5.56251, -6.99818, -2.12513, 8.98159, -6.89834, -7.26863, -7.92654, -6.34346, + 4.86201, 1.49442, 4.92905, 4.42847, -5.57789, -5.3186, 4.34232, 3.34888, + 2.64614, 2.34723, -4.10363, 8.41491, -2.18648, 8.18706, -3.39871, 8.19848, + -2.66098, 9.6026, -6.95927, -6.42774, -5.61392, -7.74628, 5.60376, 4.18369, + 5.28536, 4.13642, 4.8428, 0.457426, -6.33816, -6.12095, -2.4394, 8.62897, + 4.56938, 2.45967, 4.0582, 0.958413, 5.62164, 1.64834, 5.73119, 2.58231, + 4.66806, 1.96405, -6.71905, -6.87706, -2.18503, 8.88414, -6.03901, -6.33338, + -8.38435, -6.12005, 0.0641622, 9.0735, 5.19967, 3.05395, -5.48716, -7.13016, + -6.85541, -5.46789, -1.88353, 8.15713, 4.27891, 3.1325, -2.75816, 9.98586, + -2.03022, 9.34795, -7.66741, -7.50096, -3.39305, 9.16801, -8.49476, -5.71537, + -1.68378, 9.8278, -7.41559, -6.07205, -3.15577, 7.93274, 5.22381, 1.61388, + 3.65739, 1.74854, 4.94251, 1.21889, -7.12832, -5.27276, -9.58286, -6.20223, + -2.21613, 8.29993, 5.34799, 2.92987, 4.09496, 2.37231, -7.25183, -5.79136, + -6.46981, -7.12137, -6.28607, -9.8205, 4.52865, 1.06926, -3.10984, 8.72259, + 3.61865, 2.68153, -5.96604, -7.68329, 3.11435, 1.28126, -1.1064, 7.61243, + -2.17688, 8.2658, -3.27246, 7.2094, -5.55143, -6.32388, -1.69667, 10.3705, + -2.16558, 7.25125, -6.36572, -6.70053, 4.12259, 3.38252, -4.80554, -7.79949, + -5.23966, -6.13798, 4.21969, 1.69139, -1.98985, 10.547, -2.52269, 7.95658, + -6.75642, -6.32862, -3.51521, 7.8001, 4.70435, -0.00229688, 6.25359, 2.4267, + 5.82935, 0.745562, 5.24778, 2.15978, 5.48052, 1.32055, -3.05358, 9.12521, + -3.18922, 9.24654, 4.47276, 2.11988, 5.36751, 2.02512, -2.18511, 8.6292, + -2.48469, 9.51228, 5.57556, 3.24472, -2.58121, 10.0178, -6.12629, -6.49895, + -4.54732, 8.0062, -4.20166, 10.5438, -7.61422, -7.69036, -4.42797, 8.98777, + 4.45301, 1.53344, 4.59296, 2.45021, -6.81264, -6.36417, 4.62346, 3.16156, + -5.93007, -8.36501, -2.78425, 6.71237, -6.17141, -6.64689, -5.20608, 8.95999, + -7.30598, -5.73166, 4.39572, 2.93726, -1.89503, 9.77179, -5.683, -7.48989, + 4.80924, 0.559455, -2.17793, 9.98983, 5.23728, 2.67434, -7.03976, -6.20877, + 3.90435, 3.20926, -7.78536, -7.53388, -1.00684, 9.08838, -5.26741, -5.98327, + 3.28002, 2.71942, -1.47166, 8.50427, -2.32733, 9.26251, 5.16271, 1.39947, + -6.59093, -6.61979, -2.44492, 7.93654, -1.05805, 9.97356, -3.1109, 10.8666, + 3.38834, 3.41693, 4.83098, 2.01961, -2.74013, 9.71049, -3.34892, 8.41489, + 4.94768, 0.263001, 3.57477, 1.66795, 5.78915, 1.26999, -4.81812, -5.67174, + -1.88508, 9.64263, 3.69048, 4.60555, 4.03037, 1.7862, -7.4418, -7.08933}, + {0.127717, 0.211407, 0.195547, 0.21633, 0.39671, 0.229008, 0.20839, 0.169236, 0.314314, + 0.322473, 0.169506, 0.45499, 0.147819, 0.296502, 0.15198, 0.356444, 0.0992833, 0.220833, + 0.296206, 0.178067, 0.135359, 0.189725, 0.243099, 0.519986, 0.168105, 0.273465, 0.126033, + 0.18045, 0.282832, 0.193901, 0.213704, 0.425046, 0.203191, 0.228674, 0.209267, 0.355039, + 0.212918, 0.315495, 0.294112, 0.257576, 0.5786, 0.186019, 0.171919, 0.171919, 0.449151, + 1.34947, 0.171919, 0.16341, 0.641387, 0.342115, 0.267343, 0.246125, 0.277612, 0.181462, + 0.22944, 1.95598, 0.164897, 0.235803, 0.228273, 0.314629, 0.127403, 0.241241, 0.189362, + 0.151691, 0.130085, 0.526707, 0.217069, 0.282306, 0.531523, 0.177035, 0.169776, 0.20395, + 0.177165, 0.146628, 0.280013, 0.223033, 0.50947, 0.184133, 0.295329, 0.183219, 0.28166, + 0.179348, 0.276462, 1.00283, 0.248147, 0.214453, 0.231732, 0.170672, 0.256893, 0.133271, + 0.151137, 0.500823, 0.23678, 0.376983, 0.362061, 0.140013, 0.388863, 0.398552, 0.38015, + 0.190081, 0.167115, 0.206884, 0.473849, 1.05117, 0.435665, 0.323618, 0.326201, 0.32226, + 0.201787, 0.246496, 0.28325, 0.226596, 0.238153, 0.277268, 0.674629, 0.179433, 0.175651, + 0.154778, 0.178195, 0.192796, 0.103571, 0.227621, 0.201124, 0.160525, 0.160964, 0.240099, + 0.258027, 0.134127, 0.127717, 0.341378, 0.311595, 0.282306, 0.168988, 0.40775, 0.246125, + 0.583131, 0.236804, 0.238633, 0.194824, 0.169315, 0.244227, 0.249511, 0.189725, 0.305662, + 0.301415, 0.658641, 0.250944, 0.151792, 0.141383, 0.143843, 0.563347, 0.184216, 0.204155, + 0.221764, 0.314908, 0.144518, 0.228808, 0.255785, 0.163457, 0.424705, 0.170202, 0.312598, + 0.300629, 0.532614, 0.661392, 0.228273, 0.543432, 0.257175, 0.258994, 0.281413, 0.273897, + 0.246837, 0.293489, 0.25533, 0.260492, 0.213704, 0.3091, 0.17103, 0.172285, 0.241399, + 0.35999, 0.372243, 0.269191, 0.390239, 0.31761, 0.200593, 0.22197, 0.752914, 0.266571, + 0.13102, 0.268659, 0.293723, 0.356294, 0.296258, 0.264531, 0.15468, 0.358535, 0.243711, + 0.112147, 0.121659, 0.197101, 0.515292, 0.245628, 0.279863, 0.789807, 0.195156, 0.196073, + 0.149564, 0.118675, 0.389373, 0.233821, 0.176128, 0.481088, 0.360027, 0.553152, 0.208207, + 0.171608, 0.160489, 0.334298, 0.139426, 0.168603, 0.266199, 0.326458, 0.103571, 0.171208, + 0.130961, 0.190887, 0.177229, 0.241651, 0.115152, 0.196753, 0.481088, 0.230965, 0.354631, + 0.14591, 0.328543, 0.141544, 0.195888, 0.290379, 0.245954, 0.184547, 0.575214, 0.186929, + 0.28527, 0.292213, 1.20033, 0.281528, 0.15625, 0.211524, 0.186398, 0.298061, 0.147393, + 0.245349, 0.164527, 0.224771, 0.222382, 0.251643, 0.148835, 0.135359, 0.204967, 0.193024, + 0.486309, 0.389686, 0.211921, 0.307405, 0.38666, 0.26802, 0.16605, 0.323134, 0.268397, + 0.217894, 0.974118, 0.371618, 0.156201, 0.305787, 0.339305, 0.371032, 0.381765, 0.22747, + 0.24906, 0.100884, 0.253192, 0.314253, 0.388289, 0.580947, 1.00267, 0.241998, 0.489101, + 0.341501, 0.247423, 0.328311, 0.440281, 0.14927, 0.244469, 0.846828, 0.191725, 0.217429, + 0.123403, 0.322875, 0.145373, 0.757259, 0.190086, 0.316286, 0.268397, 0.296721, 0.440472, + 0.186848, 0.232134, 0.180239, 0.219724, 0.205886, 0.250975, 0.145636, 0.312476, 0.366418, + 0.128135, 0.315235, 0.264531, 0.161815, 0.31631, 0.296489, 0.37171, 0.197217, 0.195625, + 0.479579, 0.443037, 0.323347, 0.193616, 0.160251, 0.8952, 0.256291, 0.593345, 0.177165, + 0.409514, 0.847863, 0.111448, 0.210031, 0.251347, 0.351953, 0.705204, 0.117901, 0.182343, + 0.230179, 0.83632, 0.22104, 0.145163, 0.200326, 0.23431, 0.21868, 0.253575, 0.186562, + 0.192757, 0.172716, 0.27396, 0.258581, 0.327892, 0.376138, 0.223477, 0.302375, 0.145845, + 0.436902, 0.421794, 0.328543, 0.19246, 0.238889, 0.254866, 0.284674, 0.457849, 0.202937, + 0.392568, 0.453083, 0.782713, 0.465401, 0.178623, 0.304863, 0.190081, 0.228641, 0.255135, + 0.245037, 0.217526, 0.109584, 0.276462, 0.182301, 0.38582, 0.349942, 1.3889, 0.30235, + 0.796353, 0.160168, 0.643204, 0.153752, 0.410268, 0.186439, 0.256834, 0.185783, 0.0957629, + 0.226596, 0.197951, 0.17123, 0.192836, 0.18405, 0.575784, 0.228874, 0.201787, 0.241209, + 0.217386, 0.195751, 0.291585, 0.144531, 0.14176, 0.157635, 0.410268, 0.476338, 0.308148, + 0.148077, 0.152093, 0.196791, 0.568087, 0.414026, 0.250587, 0.473463, 0.293645, 0.396768, + 0.2766, 0.38664, 0.135034, 1.50827, 0.472527, 0.268418, 0.40383, 0.375914, 0.246496, + 0.176474, 0.340405, 0.220833, 0.138782, 0.159009, 0.444219, 0.259582, 0.33638, 0.195586, + 0.210974, 0.200288, 0.148129, 0.0974216, 0.211588, 0.280081, 0.44113, 0.773921, 0.553848, + 0.448079, 0.183136, 0.380854, 0.685021, 0.308767, 0.553276, 0.181578, 0.164759, 0.313889, + 0.137886, 0.545387, 0.278449, 0.736895, 0.360054, 0.358929, 0.457315, 0.343278, 0.507662, + 0.280829, 0.113886, 0.23146, 0.160584, 0.192796, 0.147561, 0.241272, 0.168988, 0.730511, + 0.27836, 0.179847, 0.22555, 0.418069, 0.158348, 0.128965, 0.179454, 0.126366, 0.164434, + 0.273633, 0.309556, 0.500823, 0.367852, 0.192875, 0.230262, 0.32724, 0.249969, 0.142618, + 0.494229, 0.36108, 0.227931, 0.23113, 0.742825, 0.190126, 0.33741, 0.280598, 0.145268, + 0.378423, 0.211921, 0.183594, 0.59201, 0.279563, 0.195683, 0.248101, 0.199754, 0.342494, + 0.174343, 0.14149, 0.28085, 0.175781, 0.518738, 0.17223, 0.489904, 0.181167, 0.354286, + 0.297824, 0.280829, 0.219412, 0.22814, 0.195625, 0.313949, 0.294708, 0.211551, 0.236255, + 0.666933, 0.204808, 0.52591, 0.180725, 0.186889, 0.246589, 0.410575, 0.338348, 0.206219, + 0.361766, 0.158143, 0.280816, 0.4149, 0.773082, 0.340046, 0.369672, 0.256923, 0.167195, + 0.197217, 0.252339, 0.172716, 0.191526, 0.263085, 0.345698, 0.168286, 0.243099, 0.434631, + 0.22944, 0.161862, 0.206589, 0.23457, 0.181924, 0.419063, 0.183427, 0.186152, 0.236352, + 0.306336, 0.149002, 1.50086, 0.188231, 0.442757, 0.485602, 0.466662, 0.17329, 0.141329, + 0.180619, 0.160061, 0.192569, 0.270999, 0.117901, 0.362693, 0.217561, 0.208975, 0.233658, + 0.175173, 1.10307, 0.14625, 1.31124, 0.237608, 0.286784, 0.325112, 0.2485, 0.259641, + 0.553152, 0.179039, 0.780781, 0.174758, 0.297824, 0.2558, 0.235949, 0.952186, 0.356744, + 0.312646, 0.189362, 0.574524, 0.705204, 0.213168, 0.225956, 0.424165, 0.169506, 0.137109, + 0.352451, 0.454554, 0.653302, 0.31261, 0.194412, 0.23719, 0.137886, 0.31498, 0.199085, + 0.203875, 0.597248, 1.10036, 0.196869, 0.22104, 0.451345, 0.105613, 0.683928, 0.135204, + 0.25533, 0.607871, 0.219724, 0.184464, 0.725001, 0.160061, 0.333407, 0.192569, 0.234147, + 0.47178, 0.161815, 0.242455, 0.215305, 0.410575, 0.242376, 0.211335, 0.462804, 0.275065, + 0.126878, 0.170404, 0.179433, 0.147244, 0.109584, 0.352905, 0.158215, 0.197604, 0.172407, + 0.407506, 0.645446, 0.313061, 0.165602, 0.136663, 0.55444, 0.15527, 0.133128, 0.125912, + 0.340405, 0.44521, 0.122783, 0.814526, 0.243773, 0.15743, 0.266743, 0.684458, 0.22221, + 0.181294, 0.193901, 0.258802, 0.167195, 0.292056, 0.132309, 0.227671, 0.117334, 0.271758, + 0.146185, 0.225042, 0.225964, 0.194863, 0.290274, 0.138438, 0.196714, 0.266012, 0.267771, + 0.162544, 0.244258, 0.358038, 0.522617, 0.192875, 0.45066, 0.330396, 0.223477, 0.42967, + 0.350884, 0.404655, 0.123155, 0.431583, 0.191675, 0.147354, 0.609034, 0.459487, 0.187337, + 0.215128, 0.604169, 0.330165, 0.494229, 0.40775, 0.167377, 0.192648, 0.234635, 0.275578, + 0.253094, 0.420063, 0.228299, 0.206478, 0.20395, 0.377656, 0.317393, 0.478623, 0.159009, + 0.217034, 0.300933, 0.139754, 0.153901, 0.261077, 0.22834, 0.449609, 0.157672, 0.176474, + 0.285704, 0.180186, 0.212738, 0.266428, 0.388313, 0.0954637, 0.298093, 0.251643, 0.330696, + 0.159572, 0.210666, 0.149411, 0.139618, 0.338472, 0.450304, 0.208793, 0.583609, 0.185865, + 0.400576, 0.21626, 0.174867, 0.239144, 0.249113, 0.200402, 0.275065, 0.238793, 0.205784, + 0.4475, 0.231262, 0.259082, 0.20934, 0.16806, 0.193616, 0.213811, 0.395632, 0.482465, + 0.274649, 0.307405, 0.165866, 0.334275, 0.683337, 0.368825, 0.14625, 0.780742, 0.163457, + 0.226596, 0.138713, 1.79155, 0.400443, 0.233658, 0.426399, 0.623024, 0.670955, 0.123588, + 0.110899, 0.173751, 0.651068, 0.199983, 0.190887, 0.541435, 0.21324, 0.266571, 0.134638, + 0.179348, 0.145636, 0.170929, 0.623252, 0.587738, 0.109688, 0.515314, 0.217666, 0.213311, + 0.249144, 0.187947, 0.270999, 0.268311, 0.469782, 0.763609, 0.32124, 0.146315, 0.265223, + 0.298694, 0.197623, 0.21349, 0.845778, 0.175466, 0.123588, 0.17223, 0.258603, 1.17119, + 0.538142, 0.407675, 0.120288, 0.587238, 0.244664, 0.333956, 0.132812, 0.21399, 0.302375, + 0.275882, 0.134284, 0.377555, 0.228541, 0.187307, 0.143804, 0.180545, 0.222451, 0.239638, + 0.188028, 0.46334, 0.175868, 0.242392, 0.314762, 0.44473, 0.21962, 0.175966, 1.12364, + 0.138837, 0.400576, 0.18184, 0.137706, 0.409763, 0.216894, 0.466662, 0.376604, 0.487155, + 0.283143, 0.118547, 0.221591, 0.122783, 0.179007, 0.16628, 0.180999, 0.239845, 0.169607, + 0.578402, 0.396537, 0.222288, 0.563237, 0.371238, 0.138658, 0.324336, 0.191526, 0.168603, + 0.357715, 0.640905, 0.460706, 0.220902, 0.240797, 0.164062, 0.157853, 0.34457, 0.196092, + 0.289353, 0.104597, 0.259641, 0.126878, 0.175781, 0.441458, 0.820108, 0.261864, 0.23431, + 0.254506, 0.271955, 0.227529, 0.22834, 0.196753, 0.224906, 0.193783, 0.419481, 0.236933, + 0.229706, 0.29785, 0.222947, 0.177606, 0.216911, 0.305188, 0.933438, 0.116666, 0.278483, + 0.0973824, 0.271224, 0.127717, 1.28139, 0.276283, 0.180704, 0.234554, 0.285984, 0.290172, + 0.49594, 0.135879, 0.436784, 0.206219, 0.342215, 0.374165, 0.182217, 0.274864, 0.625, + 0.356925, 0.194324, 0.342215, 0.113012, 0.155123, 0.254207, 0.438919, 0.262548, 0.302299, + 0.179528, 0.312744, 0.168513, 0.142618, 0.150543, 0.231361, 0.166004, 0.186725, 0.38848, + 0.179857, 0.182301, 0.629476, 0.44113, 0.289669, 0.328543, 0.279938, 0.14625, 0.187174, + 0.157635, 0.396749, 0.798931, 0.201541, 0.778619, 0.265883, 0.258027, 0.218576, 0.266571, + 0.160168, 0.230303, 0.273633, 0.233298, 0.30175, 0.217069, 0.345145, 0.397901, 0.224499, + 0.248101, 0.241335, 0.222947, 0.237094, 0.176518, 0.380032, 0.634775, 0.426193, 0.16362, + 0.231097, 0.219898, 0.343789, 0.275578, 0.282022, 0.628542, 0.232184, 0.848367, 0.200754, + 0.179177}, + {0, 0, 2, 3, 3, 0, 2, 2, 2, 2, 3, 0, 3, 2, 2, 2, 3, 3, 3, 3, 2, 0, 0, 0, 2, 3, 3, 3, 2, 2, 0, 0, + 2, 3, 3, 0, 0, 2, 0, 0, 3, 2, 3, 0, 3, 0, 3, 3, 0, 2, 0, 3, 2, 0, 3, 0, 3, 3, 3, 2, 2, 3, 0, 0, + 3, 3, 0, 2, 2, 3, 0, 3, 2, 2, 2, 0, 2, 3, 3, 3, 2, 3, 3, 3, 2, 0, 2, 0, 3, 3, 3, 3, 2, 2, 0, 2, + 0, 3, 2, 2, 2, 0, 0, 3, 0, 2, 2, 3, 2, 3, 0, 2, 2, 2, 3, 2, 0, 0, 2, 3, 3, 2, 0, 2, 0, 0, 2, 0, + 2, 2, 3, 2, 2, 0, 3, 0, 3, 2, 2, 2, 3, 3, 0, 0, 0, 3, 2, 3, 3, 3, 3, 0, 2, 0, 3, 2, 3, 2, 3, 0, + 2, 3, 3, 2, 3, 3, 2, 2, 0, 0, 2, 3, 3, 2, 3, 0, 2, 0, 2, 0, 3, 2, 3, 2, 3, 0, 3, 0, 3, 0, 2, 3, + 2, 2, 3, 0, 2, 2, 2, 0, 3, 2, 3, 3, 2, 3, 2, 3, 3, 2, 2, 0, 0, 2, 2, 3, 0, 3, 0, 2, 0, 0, 2, 3, + 0, 3, 3, 2, 0, 3, 3, 0, 3, 0, 2, 2, 0, 2, 0, 2, 0, 0, 0, 2, 0, 3, 2, 3, 2, 3, 2, 2, 0, 2, 3, 2, + 3, 2, 2, 2, 2, 3, 0, 2, 0, 0, 2, 3, 3, 0, 2, 3, 2, 2, 3, 0, 3, 0, 0, 2, 0, 2, 0, 2, 2, 3, 3, 2, + 3, 0, 0, 3, 2, 2, 0, 3, 2, 0, 0, 3, 0, 0, 2, 0, 3, 2, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 2, 3, 0, 0, + 2, 0, 0, 2, 0, 2, 3, 2, 3, 3, 2, 2, 0, 0, 0, 3, 0, 2, 0, 2, 0, 2, 2, 2, 3, 3, 0, 0, 3, 3, 3, 3, + 3, 2, 3, 3, 2, 3, 3, 0, 2, 2, 2, 2, 0, 2, 0, 0, 0, 2, 2, 3, 3, 2, 3, 2, 3, 0, 2, 3, 0, 2, 0, 2, + 2, 0, 3, 0, 2, 0, 2, 3, 0, 3, 0, 0, 0, 3, 2, 3, 3, 0, 3, 2, 3, 0, 2, 3, 3, 0, 2, 3, 0, 0, 0, 2, + 0, 3, 0, 2, 3, 3, 3, 3, 3, 0, 2, 0, 2, 2, 3, 3, 0, 3, 0, 2, 0, 2, 0, 3, 0, 0, 0, 2, 3, 3, 2, 3, + 0, 0, 0, 0, 3, 3, 0, 3, 2, 0, 2, 3, 2, 2, 3, 3, 2, 2, 2, 0, 2, 3, 0, 3, 3, 0, 0, 2, 0, 3, 2, 3, + 0, 2, 0, 2, 2, 3, 2, 0, 3, 3, 3, 2, 3, 0, 3, 0, 2, 2, 0, 0, 0, 3, 0, 3, 3, 2, 3, 2, 3, 2, 3, 0, + 2, 3, 0, 2, 0, 3, 3, 3, 3, 3, 3, 2, 0, 3, 2, 2, 2, 3, 3, 2, 3, 0, 2, 3, 3, 2, 2, 0, 0, 0, 0, 3, + 0, 3, 3, 3, 0, 0, 0, 3, 3, 3, 3, 3, 0, 2, 3, 3, 3, 3, 3, 3, 0, 0, 2, 2, 3, 3, 2, 2, 0, 0, 3, 0, + 0, 0, 2, 3, 0, 0, 0, 3, 0, 3, 0, 2, 2, 0, 0, 0, 0, 3, 2, 2, 3, 2, 3, 2, 2, 2, 2, 3, 0, 0, 2, 3, + 0, 3, 3, 0, 3, 0, 0, 2, 0, 3, 3, 0, 2, 2, 3, 3, 0, 0, 2, 0, 2, 3, 2, 0, 0, 3, 3, 0, 3, 2, 0, 2, + 0, 2, 3, 2, 0, 3, 3, 2, 0, 0, 2, 2, 0, 0, 2, 0, 3, 3, 2, 3, 2, 0, 3, 0, 2, 2, 3, 3, 0, 3, 2, 2, + 0, 3, 0, 0, 0, 2, 0, 3, 2, 0, 2, 3, 2, 3, 2, 2, 3, 3, 0, 2, 3, 2, 3, 2, 2, 0, 3, 0, 3, 0, 2, 2, + 2, 0, 2, 0, 2, 2, 0, 0, 3, 3, 0, 0, 3, 2, 0, 2, 3, 2, 2, 0, 3, 3, 0, 2, 0, 3, 3, 0, 2, 3, 2, 3, + 2, 0, 2, 2, 0, 0, 0, 2, 2, 3, 3, 2, 2, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 2, 0, 3, 3, + 3, 0, 2, 0, 2, 3, 2, 0, 3, 3, 2, 0, 2, 0, 3, 2, 0, 3, 0, 0, 2, 2, 0, 3, 0, 2, 3, 3, 3, 0, 2, 0, + 0, 3, 0, 2, 3, 2, 2, 0, 3, 3, 3, 3, 3, 0, 3, 0, 0, 0, 0, 3, 2, 0, 0, 2, 3, 3, 2, 2, 0, 3, 2, 0, + 3, 0, 2, 3, 3, 0, 2, 2, 3, 2, 2, 2, 3, 2, 0, 0, 3, 2, 0, 0, 0, 2, 0, 2, 0, 0, 2, 2, 3, 0, 3, 0, + 0, 3, 0, 0, 0, 3, 0, 0, 2, 2, 0, 2, 2, 3, 3, 3, 3, 0, 0, 2, 2, 2, 0, 3, 2, 2, 2, 2, 2, 0, 3, 0, + 0, 3, 2, 0, 0, 3, 2, 3, 3, 0, 3, 0, 3, 0, 3, 2, 2, 2, 0, 0, 3, 2, 2, 0, 0, 0, 2, 3, 2, 0, 2, 3, + 3, 3, 0, 3, 3, 0, 2, 0, 0, 2, 3, 3, 0, 3, 2, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, 2, 3, 0, 3, 3, 0, 3, + 2, 2, 0, 2, 0, 3, 0, 3, 0, 2, 3, 0, 2, 3, 2, 0, 2, 0, 3, 0, 2, 3, 3, 2, 0, 3, 3, 3, 2, 2, 3, 3, + 2, 2, 2, 0, 3, 2, 2, 0}, + {271, 271, 329, 343, 387, 426, 426, 601}, + {426, 601, 426, 387, 343, 271, 329, 271}, + {3.70991, 4.43491, 3.76334, 9.43944, 9.43944, 3.70991, 3.76334, 4.43491}}}; typedef ConnectComponentsEdgesTest ConnectComponentsEdgesTestF_Int; -TEST_P(ConnectComponentsEdgesTestF_Int, Result) -{ - EXPECT_TRUE(true); -} +TEST_P(ConnectComponentsEdgesTestF_Int, Result) { EXPECT_TRUE(true); } INSTANTIATE_TEST_CASE_P(ConnectComponentsEdgesTest, ConnectComponentsEdgesTestF_Int, From e3dbdb00044d830baad32242c43ed790453968e0 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Mon, 5 Jun 2023 09:20:38 -0700 Subject: [PATCH 29/53] Remove unnecessary imports --- cpp/include/raft/matrix/detail/scatter.cuh | 2 -- cpp/include/raft/matrix/scatter.cuh | 2 +- .../sparse/neighbors/detail/connect_components.cuh | 4 +--- cpp/test/matrix/gather.cu | 2 -- cpp/test/matrix/scatter.cu | 2 -- cpp/test/sparse/neighbors/connect_components.cu | 11 ++--------- 6 files changed, 4 insertions(+), 19 deletions(-) diff --git a/cpp/include/raft/matrix/detail/scatter.cuh b/cpp/include/raft/matrix/detail/scatter.cuh index 2b29587c58..b2f25dbb2d 100644 --- a/cpp/include/raft/matrix/detail/scatter.cuh +++ b/cpp/include/raft/matrix/detail/scatter.cuh @@ -15,13 +15,11 @@ */ #pragma once -#include "raft/core/resource/cuda_stream.hpp" #include #include #include #include #include -#include #include #include diff --git a/cpp/include/raft/matrix/scatter.cuh b/cpp/include/raft/matrix/scatter.cuh index 849a5c7409..6b09d9486d 100644 --- a/cpp/include/raft/matrix/scatter.cuh +++ b/cpp/include/raft/matrix/scatter.cuh @@ -17,7 +17,7 @@ #pragma once #include -#include +#include #include namespace raft::matrix { diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 8d9e1eef58..39ee203fa5 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include #include @@ -38,8 +38,6 @@ #include #include -#include - #include #include #include diff --git a/cpp/test/matrix/gather.cu b/cpp/test/matrix/gather.cu index dcc4e81c6d..c9f3f727ef 100644 --- a/cpp/test/matrix/gather.cu +++ b/cpp/test/matrix/gather.cu @@ -15,8 +15,6 @@ */ #include "../test_utils.cuh" -#include "raft/core/logger-macros.hpp" -#include "raft/util/cudart_utils.hpp" #include #include #include diff --git a/cpp/test/matrix/scatter.cu b/cpp/test/matrix/scatter.cu index 4bc09226ae..195df07203 100644 --- a/cpp/test/matrix/scatter.cu +++ b/cpp/test/matrix/scatter.cu @@ -18,11 +18,9 @@ #include #include #include -#include #include #include #include -#include #include #include #include diff --git a/cpp/test/sparse/neighbors/connect_components.cu b/cpp/test/sparse/neighbors/connect_components.cu index b4a2c2b344..baa4e37b65 100644 --- a/cpp/test/sparse/neighbors/connect_components.cu +++ b/cpp/test/sparse/neighbors/connect_components.cu @@ -494,15 +494,8 @@ class ConnectComponentsEdgesTest MutualReachabilityFixConnectivitiesRedOp red_op(core_dists.data(), params.n_row); - raft::linkage::connect_components(handle, - out_edges, - data.data(), - colors.data(), - params.n_row, - params.n_col, - red_op, - params.n_row, - params.n_col); + raft::linkage::connect_components( + handle, out_edges, data.data(), colors.data(), params.n_row, params.n_col, red_op, 13, 1); ASSERT_TRUE( devArrMatch(out_edges.rows(), params.expected_rows.data(), out_edges.nnz, Compare())); From 6ae1081069ac3319da820304f657bd0ad7b7b9ce Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Fri, 9 Jun 2023 13:24:13 -0700 Subject: [PATCH 30/53] some updates after pr reviews --- cpp/include/raft/matrix/batched_rearrange.cuh | 92 ------------------- cpp/include/raft/matrix/detail/gather.cuh | 11 ++- 2 files changed, 8 insertions(+), 95 deletions(-) delete mode 100644 cpp/include/raft/matrix/batched_rearrange.cuh diff --git a/cpp/include/raft/matrix/batched_rearrange.cuh b/cpp/include/raft/matrix/batched_rearrange.cuh deleted file mode 100644 index 5faecbc370..0000000000 --- a/cpp/include/raft/matrix/batched_rearrange.cuh +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include -#include -#include - -namespace raft { -namespace matrix { - -/** - * @brief In-place gather elements in a row-major matrix according to a - * map. The length of the map is equal to the number of rows. - * Batching is done on columns and an additional scratch space of - * shape n_rows * cols_batch_size is created. For each batch, chunks - * of columns from each row are copied into the appropriate location - * in the scratch space and copied back to the corresponding locations - * in the input matrix - * - * @tparam value_idx - * @tparam value_t - * - * @param[in] handle raft handle - * @param[inout] in input matrix (n_rows * n_cols) - * @param[in] map map containing the order in which rows are to be rearranged (n_rows) - * @param[in] col_batch_size column batch size - */ -template -void batched_gather(raft::device_resources const& handle, - raft::device_matrix_view in, - raft::device_vector_view map, - size_t col_batch_size) -{ - IdxT m = in.extent(0); - IdxT n = in.extent(1); - IdxT map_len = map.extent(0); - RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= (size_t)n, - "col_batch_size should be > 0 and <= n"); - RAFT_EXPECTS(map_len == m, "size of map should be equal to the number of rows in input matrix"); - - detail::batched_gather(handle, in.data_handle(), map.data_handle(), n, m, col_batch_size); -} - -/** - * @brief In-place scatter elements in a row-major matrix according to a - * map. The length of the map is equal to the number of rows. - * Batching is done on columns and an additional scratch space of - * shape n_rows * cols_batch_size is created. For each batch, chunks - * of columns from each row are copied into the appropriate location - * in the scratch space and copied back to the corresponding locations - * in the input matrix - * - * @tparam value_idx - * @tparam value_t - * - * @param[in] handle raft handle - * @param[inout] in input matrix (n_rows * n_cols) - * @param[in] map map containing destination index of each row (n_rows) - * @param[in] col_batch_size column batch size - */ -template -void batched_scatter(raft::device_resources const& handle, - raft::device_matrix_view in, - raft::device_vector_view map, - size_t col_batch_size) -{ - IdxT m = in.extent(0); - IdxT n = in.extent(1); - IdxT map_len = map.extent(0); - RAFT_EXPECTS(0 < col_batch_size && col_batch_size <= (size_t)n, - "col_batch_size should be > 0 and <= n"); - RAFT_EXPECTS(map_len == m, "size of map should be equal to the number of rows in input matrix"); - - detail::batched_scatter(handle, in.data_handle(), map.data_handle(), n, m, col_batch_size); -} -}; // end namespace matrix -}; // end namespace raft \ No newline at end of file diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index 71def5db58..395f32cac5 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -377,12 +377,14 @@ void gatherInplaceImpl(raft::resources const& handle, RAFT_EXPECTS(batch_size <= n, "batch size should be <= number of columns"); auto exec_policy = resource::get_thrust_policy(handle); + IndexT n_batches = raft::ceildiv(n, batch_size); + + auto scratch_space = raft::make_device_vector(handle, m * batch_size); + for (IndexT bid = 0; bid < n_batches; bid++) { IndexT batch_offset = bid * batch_size; IndexT cols_per_batch = min(batch_size, n - batch_offset); - auto scratch_space = - raft::make_device_vector(handle, map_length * cols_per_batch); auto gather_op = [inout = inout.data_handle(), map = map.data_handle(), @@ -398,7 +400,10 @@ void gatherInplaceImpl(raft::resources const& handle, IndexT i_src = transform_op(map_val); return inout[i_src * n + batch_offset + col]; }; - raft::linalg::map_offset(handle, scratch_space.view(), gather_op); + raft::linalg::map_offset( + handle, + raft::make_device_vector_view(scratch_space.data_handle(), m * cols_per_batch), + gather_op); auto copy_op = [inout = inout.data_handle(), map = map.data_handle(), From b7be24cc2dcd2c61a677689db21f30d7d975c3d0 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Mon, 12 Jun 2023 17:40:46 -0700 Subject: [PATCH 31/53] Updates after PR reviews --- cpp/include/raft/matrix/detail/gather.cuh | 97 -------------- .../raft/matrix/detail/gather_inplace.cuh | 121 ++++++++++++++++++ .../{scatter.cuh => scatter_inplace.cuh} | 6 +- cpp/include/raft/matrix/gather.cuh | 1 + cpp/include/raft/matrix/scatter.cuh | 2 +- .../neighbors/detail/connect_components.cuh | 8 +- 6 files changed, 130 insertions(+), 105 deletions(-) create mode 100644 cpp/include/raft/matrix/detail/gather_inplace.cuh rename cpp/include/raft/matrix/detail/{scatter.cuh => scatter_inplace.cuh} (97%) diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index 395f32cac5..fc358b4807 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -16,14 +16,8 @@ #pragma once -#include #include -#include -#include -#include #include -#include -#include namespace raft { namespace matrix { @@ -350,97 +344,6 @@ void gather_if(const InputIteratorT in, gatherImpl(in, D, N, map, stencil, map_length, out, pred_op, transform_op, stream); } -template -void gatherInplaceImpl(raft::resources const& handle, - raft::device_matrix_view inout, - raft::device_vector_view map, - MapTransformOp transform_op, - IndexT batch_size) -{ - // return type of MapTransformOp, must be convertible to IndexT - typedef typename std::result_of::type MapTransformOpReturnT; - RAFT_EXPECTS((std::is_convertible::value), - "MapTransformOp's result type must be convertible to signed integer"); - - IndexT m = inout.extent(0); - IndexT n = inout.extent(1); - IndexT map_length = map.extent(0); - - // skip in case of 0 length input - if (map_length <= 0 || m <= 0 || n <= 0 || batch_size < 0) return; - - RAFT_EXPECTS(map_length <= m, "Length of map should be <= number of rows for inplace gather"); - - // re-assign batch_size for default case - if (batch_size == 0) batch_size = n; - - RAFT_EXPECTS(batch_size <= n, "batch size should be <= number of columns"); - - auto exec_policy = resource::get_thrust_policy(handle); - - IndexT n_batches = raft::ceildiv(n, batch_size); - - auto scratch_space = raft::make_device_vector(handle, m * batch_size); - - for (IndexT bid = 0; bid < n_batches; bid++) { - IndexT batch_offset = bid * batch_size; - IndexT cols_per_batch = min(batch_size, n - batch_offset); - - auto gather_op = [inout = inout.data_handle(), - map = map.data_handle(), - transform_op, - batch_offset, - map_length, - cols_per_batch = raft::util::FastIntDiv(cols_per_batch), - n] __device__(auto idx) { - IndexT row = idx / cols_per_batch; - IndexT col = idx % cols_per_batch; - MapT map_val = map[row]; - - IndexT i_src = transform_op(map_val); - return inout[i_src * n + batch_offset + col]; - }; - raft::linalg::map_offset( - handle, - raft::make_device_vector_view(scratch_space.data_handle(), m * cols_per_batch), - gather_op); - - auto copy_op = [inout = inout.data_handle(), - map = map.data_handle(), - scratch_space = scratch_space.data_handle(), - batch_offset, - map_length, - cols_per_batch = raft::util::FastIntDiv(cols_per_batch), - n] __device__(auto idx) { - IndexT row = idx / cols_per_batch; - IndexT col = idx % cols_per_batch; - inout[row * n + batch_offset + col] = scratch_space[idx]; - return; - }; - auto counting = thrust::make_counting_iterator(0); - thrust::for_each(exec_policy, counting, counting + map_length * cols_per_batch, copy_op); - } -} - -template -void gather(raft::resources const& handle, - raft::device_matrix_view inout, - raft::device_vector_view map, - MapTransformOp transform_op, - IndexT batch_size) -{ - gatherInplaceImpl(handle, inout, map, transform_op, batch_size); -} - -template -void gather(raft::resources const& handle, - raft::device_matrix_view inout, - raft::device_vector_view map, - IndexT batch_size) -{ - gatherInplaceImpl(handle, inout, map, raft::identity_op(), batch_size); -} - } // namespace detail } // namespace matrix } // namespace raft diff --git a/cpp/include/raft/matrix/detail/gather_inplace.cuh b/cpp/include/raft/matrix/detail/gather_inplace.cuh new file mode 100644 index 0000000000..4f8fe2ffb7 --- /dev/null +++ b/cpp/include/raft/matrix/detail/gather_inplace.cuh @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +namespace raft { +namespace matrix { +namespace detail { + +#pragma once +template +void gatherInplaceImpl(raft::resources const& handle, + raft::device_matrix_view inout, + raft::device_vector_view map, + MapTransformOp transform_op, + IndexT batch_size) +{ + // return type of MapTransformOp, must be convertible to IndexT + typedef typename std::result_of::type MapTransformOpReturnT; + RAFT_EXPECTS((std::is_convertible::value), + "MapTransformOp's result type must be convertible to signed integer"); + + IndexT m = inout.extent(0); + IndexT n = inout.extent(1); + IndexT map_length = map.extent(0); + + // skip in case of 0 length input + if (map_length <= 0 || m <= 0 || n <= 0 || batch_size < 0) return; + + RAFT_EXPECTS(map_length <= m, "Length of map should be <= number of rows for inplace gather"); + + RAFT_EXPECTS(batch_size >= 0, "batch size should be >= 0"); + + // re-assign batch_size for default case + if (batch_size == 0 || batch_size > n) batch_size = n; + + auto exec_policy = resource::get_thrust_policy(handle); + + IndexT n_batches = raft::ceildiv(n, batch_size); + + auto scratch_space = raft::make_device_vector(handle, map_length * batch_size); + + for (IndexT bid = 0; bid < n_batches; bid++) { + IndexT batch_offset = bid * batch_size; + IndexT cols_per_batch = min(batch_size, n - batch_offset); + + auto gather_op = [inout = inout.data_handle(), + map = map.data_handle(), + transform_op, + batch_offset, + map_length, + cols_per_batch = raft::util::FastIntDiv(cols_per_batch), + n] __device__(auto idx) { + IndexT row = idx / cols_per_batch; + IndexT col = idx % cols_per_batch; + MapT map_val = map[row]; + + IndexT i_src = transform_op(map_val); + return inout[i_src * n + batch_offset + col]; + }; + raft::linalg::map_offset( + handle, + raft::make_device_vector_view(scratch_space.data_handle(), map_length * cols_per_batch), + gather_op); + + auto copy_op = [inout = inout.data_handle(), + map = map.data_handle(), + scratch_space = scratch_space.data_handle(), + batch_offset, + map_length, + cols_per_batch = raft::util::FastIntDiv(cols_per_batch), + n] __device__(auto idx) { + IndexT row = idx / cols_per_batch; + IndexT col = idx % cols_per_batch; + inout[row * n + batch_offset + col] = scratch_space[idx]; + return; + }; + auto counting = thrust::make_counting_iterator(0); + thrust::for_each(exec_policy, counting, counting + map_length * cols_per_batch, copy_op); + } +} + +template +void gather(raft::resources const& handle, + raft::device_matrix_view inout, + raft::device_vector_view map, + MapTransformOp transform_op, + IndexT batch_size) +{ + gatherInplaceImpl(handle, inout, map, transform_op, batch_size); +} + +template +void gather(raft::resources const& handle, + raft::device_matrix_view inout, + raft::device_vector_view map, + IndexT batch_size) +{ + gatherInplaceImpl(handle, inout, map, raft::identity_op(), batch_size); +} + +} // namespace detail +} // namespace matrix +} // namespace raft \ No newline at end of file diff --git a/cpp/include/raft/matrix/detail/scatter.cuh b/cpp/include/raft/matrix/detail/scatter_inplace.cuh similarity index 97% rename from cpp/include/raft/matrix/detail/scatter.cuh rename to cpp/include/raft/matrix/detail/scatter_inplace.cuh index b2f25dbb2d..3a57c5478b 100644 --- a/cpp/include/raft/matrix/detail/scatter.cuh +++ b/cpp/include/raft/matrix/detail/scatter_inplace.cuh @@ -66,10 +66,10 @@ void scatterInplaceImpl( RAFT_EXPECTS(map_length == m, "Length of map should be equal to number of rows for inplace scatter"); - // re-assign batch_size for default case - if (batch_size == 0) batch_size = n; + RAFT_EXPECTS(batch_size >= 0, "batch size should be >= 0"); - RAFT_EXPECTS(batch_size <= n, "batch size should be <= number of columns"); + // re-assign batch_size for default case + if (batch_size == 0 || batch_size > n) batch_size = n; auto exec_policy = resource::get_thrust_policy(handle); diff --git a/cpp/include/raft/matrix/gather.cuh b/cpp/include/raft/matrix/gather.cuh index b07694445f..2fbbcfa2bb 100644 --- a/cpp/include/raft/matrix/gather.cuh +++ b/cpp/include/raft/matrix/gather.cuh @@ -20,6 +20,7 @@ #include #include #include +#include #include namespace raft::matrix { diff --git a/cpp/include/raft/matrix/scatter.cuh b/cpp/include/raft/matrix/scatter.cuh index 6b09d9486d..cd2d76a863 100644 --- a/cpp/include/raft/matrix/scatter.cuh +++ b/cpp/include/raft/matrix/scatter.cuh @@ -18,7 +18,7 @@ #include #include -#include +#include namespace raft::matrix { /** diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 39ee203fa5..ea4f3f78c9 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -58,10 +58,7 @@ namespace raft::sparse::neighbors::detail { /** * Functor with reduction ops for performing masked 1-nn - * computation. this change introduces a breaking change to - * the public API because colors are no longer a part of this - * op. The connect_components function internally ensures that - * only cross-component nearest neighbors are found. + * computation. * @tparam value_idx * @tparam value_t */ @@ -106,6 +103,9 @@ struct FixConnectivitiesRedOp { DI value_t get_value(value_t& out) const { return out; } + // Gather and scatter are necessary because this functor is used in connect_components, which + // rearranges the data internally. The gather and scatter ensure that operator() is still + // consistent after rearranging. void gather(const raft::resources& handle, value_idx* map) { auto tmp_colors = raft::make_device_vector(handle, m); From 7f4f9f361e811c824211b678dd4ade2a81c899a0 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Tue, 13 Jun 2023 12:00:14 -0700 Subject: [PATCH 32/53] Updates after PR reviews --- .../raft/matrix/detail/gather_inplace.cuh | 6 +- .../neighbors/detail/connect_components.cuh | 6 +- .../sparse/neighbors/connect_components.cu | 86 ++++++++++++++++--- 3 files changed, 85 insertions(+), 13 deletions(-) diff --git a/cpp/include/raft/matrix/detail/gather_inplace.cuh b/cpp/include/raft/matrix/detail/gather_inplace.cuh index 4f8fe2ffb7..7b4958e109 100644 --- a/cpp/include/raft/matrix/detail/gather_inplace.cuh +++ b/cpp/include/raft/matrix/detail/gather_inplace.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ namespace matrix { namespace detail { #pragma once + template void gatherInplaceImpl(raft::resources const& handle, raft::device_matrix_view inout, @@ -33,6 +34,9 @@ void gatherInplaceImpl(raft::resources const& handle, IndexT batch_size) { // return type of MapTransformOp, must be convertible to IndexT + // TODO (tarang-jain): Use cuda::std::result_of here to ensure that the return type of a + // device function is being correctly obtained. Reference: + // https://github.com/rapidsai/raft/pull/1445. typedef typename std::result_of::type MapTransformOpReturnT; RAFT_EXPECTS((std::is_convertible::value), "MapTransformOp's result type must be convertible to signed integer"); diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index ea4f3f78c9..f184d3a350 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -216,7 +216,7 @@ struct LookupColorOp { * @param[in] n_rows number of rows in original dense data * @param[in] n_cols number of columns in original dense data * @param[in] row_batch_size row batch size for computing nearest neighbors - & @param[in] col_batch_size column batch size for sorting and 'unsorting' + * @param[in] col_batch_size column batch size for sorting and 'unsorting' * @param[in] reduction_op reduction operation for computing nearest neighbors */ template @@ -320,6 +320,7 @@ void perform_1nn(raft::resources const& handle, kvp_view); } + // Transform the keys so that they correctly point to the unpermuted indices. thrust::transform(exec_policy, kvp, kvp + n_rows, @@ -331,8 +332,11 @@ void perform_1nn(raft::resources const& handle, return res; }); + // Undo permutation of the rows of X by scattering in place. raft::matrix::scatter(handle, X_mutable_view, sort_plan_const_view, (value_idx)col_batch_size); + // Undo permutation of the key-value pair and color vectors. This is not done + // inplace, so using two temporary vectors. auto tmp_colors = raft::make_device_vector(handle, n_rows); auto tmp_kvp = raft::make_device_vector(handle, n_rows); diff --git a/cpp/test/sparse/neighbors/connect_components.cu b/cpp/test/sparse/neighbors/connect_components.cu index baa4e37b65..d786caaeff 100644 --- a/cpp/test/sparse/neighbors/connect_components.cu +++ b/cpp/test/sparse/neighbors/connect_components.cu @@ -73,6 +73,7 @@ class ConnectComponentsTest params = ::testing::TestWithParam>::GetParam(); raft::sparse::COO out_edges(resource::get_cuda_stream(handle)); + raft::sparse::COO out_edges_batched(resource::get_cuda_stream(handle)); rmm::device_uvector data(params.n_row * params.n_col, resource::get_cuda_stream(handle)); @@ -127,6 +128,27 @@ class ConnectComponentsTest params.n_row, params.n_col); + raft::linkage::connect_components(handle, + out_edges_batched, + data.data(), + colors.data(), + params.n_row, + params.n_col, + red_op, + params.n_row / 2, + params.n_col / 2); + + ASSERT_TRUE(out_edges.nnz == out_edges_batched.nnz); + + ASSERT_TRUE( + devArrMatch(out_edges.rows(), out_edges_batched.rows(), out_edges.nnz, Compare())); + + ASSERT_TRUE( + devArrMatch(out_edges.cols(), out_edges_batched.cols(), out_edges.nnz, Compare())); + + ASSERT_TRUE(devArrMatch( + out_edges.vals(), out_edges_batched.vals(), out_edges.nnz, CompareApprox(1e-4))); + /** * Construct final edge list */ @@ -473,7 +495,8 @@ class ConnectComponentsEdgesTest params = ::testing::TestWithParam< ConnectComponentsMutualReachabilityInputs>::GetParam(); - raft::sparse::COO out_edges(resource::get_cuda_stream(handle)); + raft::sparse::COO out_edges_unbatched(resource::get_cuda_stream(handle)); + raft::sparse::COO out_edges_batched(resource::get_cuda_stream(handle)); rmm::device_uvector data(params.n_row * params.n_col, resource::get_cuda_stream(handle)); @@ -494,17 +517,58 @@ class ConnectComponentsEdgesTest MutualReachabilityFixConnectivitiesRedOp red_op(core_dists.data(), params.n_row); - raft::linkage::connect_components( - handle, out_edges, data.data(), colors.data(), params.n_row, params.n_col, red_op, 13, 1); - - ASSERT_TRUE( - devArrMatch(out_edges.rows(), params.expected_rows.data(), out_edges.nnz, Compare())); - - ASSERT_TRUE( - devArrMatch(out_edges.cols(), params.expected_cols.data(), out_edges.nnz, Compare())); + raft::linkage::connect_components(handle, + out_edges_unbatched, + data.data(), + colors.data(), + params.n_row, + params.n_col, + red_op, + params.n_row, + params.n_col); - ASSERT_TRUE(devArrMatch( - out_edges.vals(), params.expected_vals.data(), out_edges.nnz, CompareApprox(1e-4))); + raft::linkage::connect_components(handle, + out_edges_unbatched, + data.data(), + colors.data(), + params.n_row, + params.n_col, + red_op, + 11, + 1); + + ASSERT_TRUE(out_edges_unbatched.nnz == out_edges_batched.nnz && + out_edges_unbatched.nnz == params.expected_rows.size()); + + ASSERT_TRUE(devArrMatch(out_edges_unbatched.rows(), + params.expected_rows.data(), + out_edges_unbatched.nnz, + Compare())); + + ASSERT_TRUE(devArrMatch(out_edges_unbatched.cols(), + params.expected_cols.data(), + out_edges_unbatched.nnz, + Compare())); + + ASSERT_TRUE(devArrMatch(out_edges_unbatched.vals(), + params.expected_vals.data(), + out_edges_unbatched.nnz, + CompareApprox(1e-4))); + + ASSERT_TRUE(devArrMatch(out_edges_batched.rows(), + params.expected_rows.data(), + out_edges_batched.nnz, + Compare())); + + ASSERT_TRUE(devArrMatch(out_edges_batched.cols(), + params.expected_cols.data(), + out_edges_batched.nnz, + Compare())); + + ASSERT_TRUE(devArrMatch(out_edges_batched.vals(), + params.expected_vals.data(), + out_edges_batched.nnz, + CompareApprox(1e-4))); } void SetUp() override { basicTest(); } From be74d606c2122719b1055278f7cd1e049ff0ed81 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Tue, 13 Jun 2023 14:44:58 -0700 Subject: [PATCH 33/53] nit --- cpp/include/raft/matrix/detail/gather_inplace.cuh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/include/raft/matrix/detail/gather_inplace.cuh b/cpp/include/raft/matrix/detail/gather_inplace.cuh index 7b4958e109..2869080205 100644 --- a/cpp/include/raft/matrix/detail/gather_inplace.cuh +++ b/cpp/include/raft/matrix/detail/gather_inplace.cuh @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#pragma once #include #include @@ -24,8 +25,6 @@ namespace raft { namespace matrix { namespace detail { -#pragma once - template void gatherInplaceImpl(raft::resources const& handle, raft::device_matrix_view inout, From b94844bf9f339f597fc04e6c55c5550b326ab8c7 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Wed, 14 Jun 2023 12:16:51 -0700 Subject: [PATCH 34/53] Resolve typos --- .../raft/sparse/neighbors/detail/connect_components.cuh | 4 ---- cpp/test/sparse/neighbors/connect_components.cu | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index f184d3a350..a9e71cfb08 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -348,10 +348,6 @@ void perform_1nn(raft::resources const& handle, raft::copy_async(colors, tmp_colors.data_handle(), n_rows, stream); raft::copy_async(kvp, tmp_kvp.data_handle(), n_rows, stream); - auto keys = raft::make_device_vector(handle, n_rows); - raft::linalg::map_offset( - handle, keys.view(), [kvp] __device__(auto idx) { return kvp[idx].key; }); - LookupColorOp extract_colors_op(colors); thrust::transform(exec_policy, kvp, kvp + n_rows, nn_colors, extract_colors_op); } diff --git a/cpp/test/sparse/neighbors/connect_components.cu b/cpp/test/sparse/neighbors/connect_components.cu index d786caaeff..c61c6b0755 100644 --- a/cpp/test/sparse/neighbors/connect_components.cu +++ b/cpp/test/sparse/neighbors/connect_components.cu @@ -528,7 +528,7 @@ class ConnectComponentsEdgesTest params.n_col); raft::linkage::connect_components(handle, - out_edges_unbatched, + out_edges_batched, data.data(), colors.data(), params.n_row, From 8df0e00422c2f386bfe4a499f956ea5531f58b33 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Tue, 20 Jun 2023 16:23:02 -0700 Subject: [PATCH 35/53] add libcudacxx dependency --- cpp/CMakeLists.txt | 2 ++ cpp/cmake/thirdparty/get_libcudacxx.cmake | 23 +++++++++++++++++++ .../raft/cluster/detail/single_linkage.cuh | 2 +- .../raft/matrix/detail/gather_inplace.cuh | 3 +++ .../neighbors/detail/connect_components.cuh | 17 ++++---------- 5 files changed, 33 insertions(+), 14 deletions(-) create mode 100644 cpp/cmake/thirdparty/get_libcudacxx.cmake diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6fa1b5830e..c68ec695e2 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -158,6 +158,8 @@ include(cmake/thirdparty/get_cutlass.cmake) include(${rapids-cmake-dir}/cpm/cuco.cmake) rapids_cpm_cuco(BUILD_EXPORT_SET raft-exports INSTALL_EXPORT_SET raft-exports) +include(cmake/thirdparty/get_libcudacxx.cmake) + if(BUILD_TESTS) include(cmake/thirdparty/get_gtest.cmake) endif() diff --git a/cpp/cmake/thirdparty/get_libcudacxx.cmake b/cpp/cmake/thirdparty/get_libcudacxx.cmake new file mode 100644 index 0000000000..10663021f4 --- /dev/null +++ b/cpp/cmake/thirdparty/get_libcudacxx.cmake @@ -0,0 +1,23 @@ +#============================================================================= +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_libcudacxx) + include(${rapids-cmake-dir}/cpm/libcudacxx.cmake) + rapids_cpm_libcudacxx(BUILD_EXPORT_SET raft-exports + INSTALL_EXPORT_SET raft-exports) +endfunction() + +find_and_configure_libcudacxx() \ No newline at end of file diff --git a/cpp/include/raft/cluster/detail/single_linkage.cuh b/cpp/include/raft/cluster/detail/single_linkage.cuh index ddd422a89b..848ca0357e 100644 --- a/cpp/include/raft/cluster/detail/single_linkage.cuh +++ b/cpp/include/raft/cluster/detail/single_linkage.cuh @@ -81,7 +81,7 @@ void single_linkage(raft::resources const& handle, * 2. Construct MST, sorted by weights */ rmm::device_uvector color(m, stream); - raft::sparse::neighbors::FixConnectivitiesRedOp op(color.data(), m); + raft::sparse::neighbors::FixConnectivitiesRedOp op(m); detail::build_sorted_mst(handle, X, indptr.data(), diff --git a/cpp/include/raft/matrix/detail/gather_inplace.cuh b/cpp/include/raft/matrix/detail/gather_inplace.cuh index 2869080205..68c990c58d 100644 --- a/cpp/include/raft/matrix/detail/gather_inplace.cuh +++ b/cpp/include/raft/matrix/detail/gather_inplace.cuh @@ -21,6 +21,9 @@ #include #include +#include +#include + namespace raft { namespace matrix { namespace detail { diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index a9e71cfb08..5ed727650f 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -64,18 +64,17 @@ namespace raft::sparse::neighbors::detail { */ template struct FixConnectivitiesRedOp { - value_idx* colors; value_idx m; // default constructor for cutlass - DI FixConnectivitiesRedOp() : colors(0), m(0) {} + DI FixConnectivitiesRedOp() : m(0) {} - FixConnectivitiesRedOp(value_idx* colors_, value_idx m_) : colors(colors_), m(m_){}; + FixConnectivitiesRedOp(value_idx m_) : m(m_){}; typedef typename raft::KeyValuePair KVP; DI void operator()(value_idx rit, KVP* out, const KVP& other) const { - if (rit < m && other.value < out->value && colors[rit] != colors[other.key]) { + if (rit < m && other.value < out->value) { out->key = other.key; out->value = other.value; } @@ -83,7 +82,7 @@ struct FixConnectivitiesRedOp { DI KVP operator()(value_idx rit, const KVP& a, const KVP& b) const { - if (rit < m && a.value < b.value && colors[rit] != colors[a.key]) { + if (rit < m && a.value < b.value) { return a; } else return b; @@ -108,18 +107,10 @@ struct FixConnectivitiesRedOp { // consistent after rearranging. void gather(const raft::resources& handle, value_idx* map) { - auto tmp_colors = raft::make_device_vector(handle, m); - thrust::gather( - raft::resource::get_thrust_policy(handle), map, map + m, colors, tmp_colors.data_handle()); - raft::copy_async(colors, tmp_colors.data_handle(), m, raft::resource::get_cuda_stream(handle)); } void scatter(const raft::resources& handle, value_idx* map) { - auto tmp_colors = raft::make_device_vector(handle, m); - thrust::scatter( - raft::resource::get_thrust_policy(handle), colors, colors + m, map, tmp_colors.data_handle()); - raft::copy_async(colors, tmp_colors.data_handle(), m, raft::resource::get_cuda_stream(handle)); } }; From e3121c5ba6717bbfd6551e08906356117de96669 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Tue, 20 Jun 2023 17:24:30 -0700 Subject: [PATCH 36/53] Update with libcudacxx type_traits header --- cpp/include/raft/matrix/detail/gather.cuh | 7 +++++-- cpp/include/raft/matrix/detail/gather_inplace.cuh | 3 +-- .../raft/sparse/neighbors/detail/connect_components.cuh | 8 ++------ cpp/test/sparse/neighbors/connect_components.cu | 2 +- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index fc358b4807..a166b1f1a4 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -19,6 +19,8 @@ #include #include +#include + namespace raft { namespace matrix { namespace detail { @@ -136,12 +138,13 @@ void gatherImpl(const InputIteratorT in, typedef typename std::iterator_traits::value_type StencilValueT; // return type of MapTransformOp, must be convertible to IndexT - typedef typename std::result_of::type MapTransformOpReturnT; + typedef + typename cuda::std::result_of::type MapTransformOpReturnT; static_assert((std::is_convertible::value), "MapTransformOp's result type must be convertible to signed integer"); // return type of UnaryPredicateOp, must be convertible to bool - typedef typename std::result_of::type PredicateOpReturnT; + typedef typename cuda::std::result_of::type PredicateOpReturnT; static_assert((std::is_convertible::value), "UnaryPredicateOp's result type must be convertible to bool type"); diff --git a/cpp/include/raft/matrix/detail/gather_inplace.cuh b/cpp/include/raft/matrix/detail/gather_inplace.cuh index 68c990c58d..4e394b49b3 100644 --- a/cpp/include/raft/matrix/detail/gather_inplace.cuh +++ b/cpp/include/raft/matrix/detail/gather_inplace.cuh @@ -22,7 +22,6 @@ #include #include -#include namespace raft { namespace matrix { @@ -39,7 +38,7 @@ void gatherInplaceImpl(raft::resources const& handle, // TODO (tarang-jain): Use cuda::std::result_of here to ensure that the return type of a // device function is being correctly obtained. Reference: // https://github.com/rapidsai/raft/pull/1445. - typedef typename std::result_of::type MapTransformOpReturnT; + typedef typename cuda::std::result_of::type MapTransformOpReturnT; RAFT_EXPECTS((std::is_convertible::value), "MapTransformOp's result type must be convertible to signed integer"); diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 5ed727650f..72aa16434a 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -105,13 +105,9 @@ struct FixConnectivitiesRedOp { // Gather and scatter are necessary because this functor is used in connect_components, which // rearranges the data internally. The gather and scatter ensure that operator() is still // consistent after rearranging. - void gather(const raft::resources& handle, value_idx* map) - { - } + void gather(const raft::resources& handle, value_idx* map) {} - void scatter(const raft::resources& handle, value_idx* map) - { - } + void scatter(const raft::resources& handle, value_idx* map) {} }; /** diff --git a/cpp/test/sparse/neighbors/connect_components.cu b/cpp/test/sparse/neighbors/connect_components.cu index c61c6b0755..a7afd25e04 100644 --- a/cpp/test/sparse/neighbors/connect_components.cu +++ b/cpp/test/sparse/neighbors/connect_components.cu @@ -117,7 +117,7 @@ class ConnectComponentsTest /** * 3. connect_components to fix connectivities */ - raft::linkage::FixConnectivitiesRedOp red_op(colors.data(), params.n_row); + raft::linkage::FixConnectivitiesRedOp red_op(params.n_row); raft::linkage::connect_components(handle, out_edges, data.data(), From d9293785dcf3018acaafda8fc482a0794b242940 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Tue, 20 Jun 2023 17:42:20 -0700 Subject: [PATCH 37/53] Update todo --- cpp/test/sparse/neighbors/connect_components.cu | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/test/sparse/neighbors/connect_components.cu b/cpp/test/sparse/neighbors/connect_components.cu index a7afd25e04..8d12547f54 100644 --- a/cpp/test/sparse/neighbors/connect_components.cu +++ b/cpp/test/sparse/neighbors/connect_components.cu @@ -14,13 +14,15 @@ * limitations under the License. */ -// XXX: We allow the instantiation of fused_l2_nn here: -// raft::linkage::FixConnectivitiesRedOp red_op(colors.data(), params.n_row); +// XXX: We allow the instantiation of masked_l2_nn here: +// raft::linkage::FixConnectivitiesRedOp red_op(params.n_row); // raft::linkage::connect_components( // handle, out_edges, data.data(), colors.data(), params.n_row, params.n_col, red_op); // // TODO: consider adding this to libraft.so or creating an instance in a // separate translation unit for this test. +// +// TODO: edge case testing #undef RAFT_EXPLICIT_INSTANTIATE_ONLY #include From a0169c39ca60942265671ca6c58a23b90e6bef3c Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Tue, 20 Jun 2023 19:07:42 -0700 Subject: [PATCH 38/53] add proclaim_return_type to predicate --- cpp/include/raft/matrix/detail/gather.cuh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index a166b1f1a4..08d57db5fa 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -16,9 +16,11 @@ #pragma once +#include #include #include +#include #include namespace raft { @@ -144,7 +146,8 @@ void gatherImpl(const InputIteratorT in, "MapTransformOp's result type must be convertible to signed integer"); // return type of UnaryPredicateOp, must be convertible to bool - typedef typename cuda::std::result_of::type PredicateOpReturnT; + typedef typename cuda::std::result_of(pred_op))( + StencilValueT)>::type PredicateOpReturnT; static_assert((std::is_convertible::value), "UnaryPredicateOp's result type must be convertible to bool type"); From 69e393af28019ac05c6cb77884ef136f92044285 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Thu, 22 Jun 2023 11:21:04 -0700 Subject: [PATCH 39/53] remove libcudacxx dependency and rename api --- cpp/CMakeLists.txt | 2 - cpp/cmake/thirdparty/get_libcudacxx.cmake | 23 ---- cpp/include/raft/cluster/detail/mst.cuh | 23 ++-- cpp/include/raft/matrix/detail/gather.cuh | 15 --- .../raft/matrix/detail/gather_inplace.cuh | 10 -- .../sparse/neighbors/connect_components.cuh | 32 ++++-- .../neighbors/detail/connect_components.cuh | 101 ++++++++++-------- .../sparse/selection/connect_components.cuh | 2 +- cpp/test/cluster/linkage.cu | 6 +- .../sparse/neighbors/connect_components.cu | 82 +++++++------- 10 files changed, 139 insertions(+), 157 deletions(-) delete mode 100644 cpp/cmake/thirdparty/get_libcudacxx.cmake diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c68ec695e2..6fa1b5830e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -158,8 +158,6 @@ include(cmake/thirdparty/get_cutlass.cmake) include(${rapids-cmake-dir}/cpm/cuco.cmake) rapids_cpm_cuco(BUILD_EXPORT_SET raft-exports INSTALL_EXPORT_SET raft-exports) -include(cmake/thirdparty/get_libcudacxx.cmake) - if(BUILD_TESTS) include(cmake/thirdparty/get_gtest.cmake) endif() diff --git a/cpp/cmake/thirdparty/get_libcudacxx.cmake b/cpp/cmake/thirdparty/get_libcudacxx.cmake deleted file mode 100644 index 10663021f4..0000000000 --- a/cpp/cmake/thirdparty/get_libcudacxx.cmake +++ /dev/null @@ -1,23 +0,0 @@ -#============================================================================= -# Copyright (c) 2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -#============================================================================= - -function(find_and_configure_libcudacxx) - include(${rapids-cmake-dir}/cpm/libcudacxx.cmake) - rapids_cpm_libcudacxx(BUILD_EXPORT_SET raft-exports - INSTALL_EXPORT_SET raft-exports) -endfunction() - -find_and_configure_libcudacxx() \ No newline at end of file diff --git a/cpp/include/raft/cluster/detail/mst.cuh b/cpp/include/raft/cluster/detail/mst.cuh index 8ee0a76598..7cfc55a4b6 100644 --- a/cpp/include/raft/cluster/detail/mst.cuh +++ b/cpp/include/raft/cluster/detail/mst.cuh @@ -81,15 +81,20 @@ void connect_knn_graph( raft::sparse::COO connected_edges(stream); - raft::sparse::neighbors::connect_components(handle, - connected_edges, - X, - color, - m, - n, - reduction_op, - min(m, (size_t)4096), - min(n, (size_t)16)); + // default row and column batch sizes are chosen for computing cross component nearest neighbors. + // Reference: PR #1445 + static constexpr size_t default_row_batch_size = 4096; + static constexpr size_t default_col_batch_size = 16; + + raft::sparse::neighbors::cross_component_1nn(handle, + connected_edges, + X, + color, + m, + n, + reduction_op, + min(m, default_row_batch_size), + min(n, default_col_batch_size)); rmm::device_uvector indptr2(m + 1, stream); raft::sparse::convert::sorted_coo_to_csr( diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index 08d57db5fa..59fcf606c8 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -20,9 +20,6 @@ #include #include -#include -#include - namespace raft { namespace matrix { namespace detail { @@ -139,18 +136,6 @@ void gatherImpl(const InputIteratorT in, // stencil value type typedef typename std::iterator_traits::value_type StencilValueT; - // return type of MapTransformOp, must be convertible to IndexT - typedef - typename cuda::std::result_of::type MapTransformOpReturnT; - static_assert((std::is_convertible::value), - "MapTransformOp's result type must be convertible to signed integer"); - - // return type of UnaryPredicateOp, must be convertible to bool - typedef typename cuda::std::result_of(pred_op))( - StencilValueT)>::type PredicateOpReturnT; - static_assert((std::is_convertible::value), - "UnaryPredicateOp's result type must be convertible to bool type"); - IndexT len = map_length * D; constexpr int TPB = 128; const int n_sm = raft::getMultiProcessorCount(); diff --git a/cpp/include/raft/matrix/detail/gather_inplace.cuh b/cpp/include/raft/matrix/detail/gather_inplace.cuh index 4e394b49b3..cc510e068b 100644 --- a/cpp/include/raft/matrix/detail/gather_inplace.cuh +++ b/cpp/include/raft/matrix/detail/gather_inplace.cuh @@ -21,8 +21,6 @@ #include #include -#include - namespace raft { namespace matrix { namespace detail { @@ -34,14 +32,6 @@ void gatherInplaceImpl(raft::resources const& handle, MapTransformOp transform_op, IndexT batch_size) { - // return type of MapTransformOp, must be convertible to IndexT - // TODO (tarang-jain): Use cuda::std::result_of here to ensure that the return type of a - // device function is being correctly obtained. Reference: - // https://github.com/rapidsai/raft/pull/1445. - typedef typename cuda::std::result_of::type MapTransformOpReturnT; - RAFT_EXPECTS((std::is_convertible::value), - "MapTransformOp's result type must be convertible to signed integer"); - IndexT m = inout.extent(0); IndexT n = inout.extent(1); IndexT map_length = map.extent(0); diff --git a/cpp/include/raft/sparse/neighbors/connect_components.cuh b/cpp/include/raft/sparse/neighbors/connect_components.cuh index 6a935ae8ff..30c5cd3fd3 100644 --- a/cpp/include/raft/sparse/neighbors/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/connect_components.cuh @@ -72,18 +72,28 @@ value_idx get_n_components(value_idx* colors, size_t n_rows, cudaStream_t stream * increase in compute time as the col_batch_size is reduced */ template -void connect_components(raft::resources const& handle, - raft::sparse::COO& out, - const value_t* X, - const value_idx* orig_colors, - size_t n_rows, - size_t n_cols, - red_op reduction_op, - size_t row_batch_size = 0, - size_t col_batch_size = 0) +void cross_component_1nn( + raft::resources const& handle, + raft::sparse::COO& out, + const value_t* X, + const value_idx* orig_colors, + size_t n_rows, + size_t n_cols, + red_op reduction_op, + size_t row_batch_size = 0, + size_t col_batch_size = 0, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2SqrtExpanded) { - detail::connect_components( - handle, out, X, orig_colors, n_rows, n_cols, reduction_op, row_batch_size, col_batch_size); + detail::cross_component_1nn(handle, + out, + X, + orig_colors, + n_rows, + n_cols, + reduction_op, + row_batch_size, + col_batch_size, + metric); } }; // end namespace raft::sparse::neighbors \ No newline at end of file diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 72aa16434a..df2263b619 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -17,12 +17,12 @@ #include #include -#include -#include - #include #include #include +#include +#include +#include #include #include #include @@ -56,6 +56,39 @@ namespace raft::sparse::neighbors::detail { +/** + * Base functor with reduction ops for performing masked 1-nn + * computation. + * @tparam value_idx + * @tparam value_t + */ +template +struct FixConnectivitiesRedOpBase { + typedef typename raft::KeyValuePair KVP; + DI void operator()(value_idx rit, KVP* out, const KVP& other) const + { + out->key = other.key; + out->value = other.value; + } + + DI KVP operator()(value_idx rit, const KVP& a, const KVP& b) const { return b; } + + DI void init(value_t* out, value_t maxVal) const {} + DI void init(KVP* out, value_t maxVal) const {} + + DI void init_key(value_t& out, value_idx idx) const { return; } + DI void init_key(KVP& out, value_idx idx) const {} + + DI value_t get_value(KVP& out) const { return out.value; } + + DI value_t get_value(value_t& out) const { return out; } + + // The gather and scatter ensure that operator() is still consistent after rearranging. + void gather(const raft::resources& handle, value_idx* map) {} + + void scatter(const raft::resources& handle, value_idx* map) {} +}; + /** * Functor with reduction ops for performing masked 1-nn * computation. @@ -63,7 +96,7 @@ namespace raft::sparse::neighbors::detail { * @tparam value_t */ template -struct FixConnectivitiesRedOp { +struct FixConnectivitiesRedOp : public FixConnectivitiesRedOpBase { value_idx m; // default constructor for cutlass @@ -87,27 +120,6 @@ struct FixConnectivitiesRedOp { } else return b; } - - DI void init(value_t* out, value_t maxVal) const { *out = maxVal; } - DI void init(KVP* out, value_t maxVal) const - { - out->key = -1; - out->value = maxVal; - } - - DI void init_key(value_t& out, value_idx idx) const { return; } - DI void init_key(KVP& out, value_idx idx) const { out.key = idx; } - - DI value_t get_value(KVP& out) const { return out.value; } - - DI value_t get_value(value_t& out) const { return out; } - - // Gather and scatter are necessary because this functor is used in connect_components, which - // rearranges the data internally. The gather and scatter ensure that operator() is still - // consistent after rearranging. - void gather(const raft::resources& handle, value_idx* map) {} - - void scatter(const raft::resources& handle, value_idx* map) {} }; /** @@ -227,7 +239,7 @@ void perform_1nn(raft::resources const& handle, thrust::sort_by_key( resource::get_thrust_policy(handle), colors, colors + n_rows, sort_plan.data_handle()); - // Modify the reduction operation based on the sort plan. This is particularly needed for HDBSCAN + // Modify the reduction operation based on the sort plan. reduction_op.gather(handle, sort_plan.data_handle()); auto X_mutable_view = @@ -439,8 +451,7 @@ void min_components_by_color(raft::sparse::COO& coo, * @param[in] n_rows number of rows in X * @param[in] n_cols number of cols in X * @param[in] reduction_op reduction operation for computing nearest neighbors. The reduction - * operation must have `gather` and `scatter` functions defined. For single linkage clustering, - * these functions are no-ops. For HDBSCAN, they sort and 'unsort' the core distances based on color + * operation must have `gather` and `scatter` functions defined * @param[in] row_batch_size the batch size for computing nearest neighbors. This parameter controls * the number of samples for which the nearest neighbors are computed at once. Therefore, it affects * the memory consumption mainly by reducing the size of the adjacency matrix for masked nearest @@ -452,26 +463,32 @@ void min_components_by_color(raft::sparse::COO& coo, * is done */ template -void connect_components(raft::resources const& handle, - raft::sparse::COO& out, - const value_t* X, - const value_idx* orig_colors, - size_t n_rows, - size_t n_cols, - red_op reduction_op, - size_t row_batch_size, - size_t col_batch_size) +void cross_component_1nn( + raft::resources const& handle, + raft::sparse::COO& out, + const value_t* X, + const value_idx* orig_colors, + size_t n_rows, + size_t n_cols, + red_op reduction_op, + size_t row_batch_size, + size_t col_batch_size, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2SqrtExpanded) { - RAFT_EXPECTS(col_batch_size <= n_cols, "col_batch_size should be >= 0 and <= n_cols"); - RAFT_EXPECTS(row_batch_size <= n_rows, "row_batch_size should be >= 0 and <= n_rows"); - if (row_batch_size == 0) { row_batch_size = n_rows; } - if (col_batch_size == 0) { col_batch_size = n_cols; } auto stream = resource::get_cuda_stream(handle); + RAFT_EXPECTS(metric == raft::distance::DistanceType::L2SqrtExpanded, + "Fixing connectivities for an unconnected k-NN graph only " + "supports L2SqrtExpanded currently."); + + if (row_batch_size == 0 || row_batch_size > n_rows) { row_batch_size = n_rows; } + + if (col_batch_size == 0 || col_batch_size > n_cols) { col_batch_size = n_cols; } + rmm::device_uvector colors(n_rows, stream); // Normalize colors so they are drawn from a monotonically increasing set - bool zero_based = true; + constexpr bool zero_based = true; raft::label::make_monotonic( colors.data(), const_cast(orig_colors), n_rows, stream, zero_based); diff --git a/cpp/include/raft/sparse/selection/connect_components.cuh b/cpp/include/raft/sparse/selection/connect_components.cuh index 9bc3f1553a..ee8d61332d 100644 --- a/cpp/include/raft/sparse/selection/connect_components.cuh +++ b/cpp/include/raft/sparse/selection/connect_components.cuh @@ -31,7 +31,7 @@ #include namespace raft::linkage { -using raft::sparse::neighbors::connect_components; +using raft::sparse::neighbors::cross_component_1nn; using raft::sparse::neighbors::FixConnectivitiesRedOp; using raft::sparse::neighbors::get_n_components; } // namespace raft::linkage \ No newline at end of file diff --git a/cpp/test/cluster/linkage.cu b/cpp/test/cluster/linkage.cu index e660dbef13..3582150e21 100644 --- a/cpp/test/cluster/linkage.cu +++ b/cpp/test/cluster/linkage.cu @@ -14,9 +14,9 @@ * limitations under the License. */ -// XXX: We allow the instantiation of fused_l2_nn here: -// raft::linkage::FixConnectivitiesRedOp red_op(colors.data(), params.n_row); -// raft::linkage::connect_components( +// XXX: We allow the instantiation of masked_l2_nn here: +// raft::linkage::FixConnectivitiesRedOp red_op(params.n_row); +// raft::linkage::cross_component_1nn( // handle, out_edges, data.data(), colors.data(), params.n_row, params.n_col, red_op); // // TODO: consider adding this to libraft.so or creating an instance in a diff --git a/cpp/test/sparse/neighbors/connect_components.cu b/cpp/test/sparse/neighbors/connect_components.cu index 8d12547f54..788db3a4f2 100644 --- a/cpp/test/sparse/neighbors/connect_components.cu +++ b/cpp/test/sparse/neighbors/connect_components.cu @@ -16,7 +16,7 @@ // XXX: We allow the instantiation of masked_l2_nn here: // raft::linkage::FixConnectivitiesRedOp red_op(params.n_row); -// raft::linkage::connect_components( +// raft::linkage::cross_component_1nn( // handle, out_edges, data.data(), colors.data(), params.n_row, params.n_col, red_op); // // TODO: consider adding this to libraft.so or creating an instance in a @@ -117,28 +117,28 @@ class ConnectComponentsTest true); /** - * 3. connect_components to fix connectivities + * 3. cross_component_1nn to fix connectivities */ raft::linkage::FixConnectivitiesRedOp red_op(params.n_row); - raft::linkage::connect_components(handle, - out_edges, - data.data(), - colors.data(), - params.n_row, - params.n_col, - red_op, - params.n_row, - params.n_col); - - raft::linkage::connect_components(handle, - out_edges_batched, - data.data(), - colors.data(), - params.n_row, - params.n_col, - red_op, - params.n_row / 2, - params.n_col / 2); + raft::linkage::cross_component_1nn(handle, + out_edges, + data.data(), + colors.data(), + params.n_row, + params.n_col, + red_op, + params.n_row, + params.n_col); + + raft::linkage::cross_component_1nn(handle, + out_edges_batched, + data.data(), + colors.data(), + params.n_row, + params.n_col, + red_op, + params.n_row / 2, + params.n_col / 2); ASSERT_TRUE(out_edges.nnz == out_edges_batched.nnz); @@ -514,30 +514,30 @@ class ConnectComponentsEdgesTest colors.data(), params.colors.data(), colors.size(), resource::get_cuda_stream(handle)); /** - * 3. connect_components to fix connectivities + * 3. cross_component_1nn to fix connectivities */ MutualReachabilityFixConnectivitiesRedOp red_op(core_dists.data(), params.n_row); - raft::linkage::connect_components(handle, - out_edges_unbatched, - data.data(), - colors.data(), - params.n_row, - params.n_col, - red_op, - params.n_row, - params.n_col); - - raft::linkage::connect_components(handle, - out_edges_batched, - data.data(), - colors.data(), - params.n_row, - params.n_col, - red_op, - 11, - 1); + raft::linkage::cross_component_1nn(handle, + out_edges_unbatched, + data.data(), + colors.data(), + params.n_row, + params.n_col, + red_op, + params.n_row, + params.n_col); + + raft::linkage::cross_component_1nn(handle, + out_edges_batched, + data.data(), + colors.data(), + params.n_row, + params.n_col, + red_op, + 11, + 1); ASSERT_TRUE(out_edges_unbatched.nnz == out_edges_batched.nnz && out_edges_unbatched.nnz == params.expected_rows.size()); From d95350f0d8412d0355cea6f802d4c9db7be37a5a Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Fri, 23 Jun 2023 10:18:43 -0700 Subject: [PATCH 40/53] updates --- cpp/bench/prims/distance/masked_nn.cu | 1 - cpp/include/raft/cluster/detail/mst.cuh | 18 ++-- .../sparse/neighbors/connect_components.cuh | 22 ++--- .../neighbors/detail/connect_components.cuh | 58 +++++-------- .../sparse/selection/connect_components.cuh | 2 +- cpp/test/cluster/linkage.cu | 2 +- .../sparse/neighbors/connect_components.cu | 82 +++++++++---------- 7 files changed, 84 insertions(+), 101 deletions(-) diff --git a/cpp/bench/prims/distance/masked_nn.cu b/cpp/bench/prims/distance/masked_nn.cu index 5f63fa4779..c804ecb3a1 100644 --- a/cpp/bench/prims/distance/masked_nn.cu +++ b/cpp/bench/prims/distance/masked_nn.cu @@ -128,7 +128,6 @@ struct masked_l2_nn : public fixture { dim3 block(32, 32); dim3 grid(10, 10); init_adj<<>>(p.pattern, p.n, adj.view(), group_idxs.view()); - RAFT_CUDA_TRY(cudaGetLastError()); } diff --git a/cpp/include/raft/cluster/detail/mst.cuh b/cpp/include/raft/cluster/detail/mst.cuh index 7cfc55a4b6..31427fecfe 100644 --- a/cpp/include/raft/cluster/detail/mst.cuh +++ b/cpp/include/raft/cluster/detail/mst.cuh @@ -86,15 +86,15 @@ void connect_knn_graph( static constexpr size_t default_row_batch_size = 4096; static constexpr size_t default_col_batch_size = 16; - raft::sparse::neighbors::cross_component_1nn(handle, - connected_edges, - X, - color, - m, - n, - reduction_op, - min(m, default_row_batch_size), - min(n, default_col_batch_size)); + raft::sparse::neighbors::cross_component_nn(handle, + connected_edges, + X, + color, + m, + n, + reduction_op, + min(m, default_row_batch_size), + min(n, default_col_batch_size)); rmm::device_uvector indptr2(m + 1, stream); raft::sparse::convert::sorted_coo_to_csr( diff --git a/cpp/include/raft/sparse/neighbors/connect_components.cuh b/cpp/include/raft/sparse/neighbors/connect_components.cuh index 30c5cd3fd3..2e38dd03ea 100644 --- a/cpp/include/raft/sparse/neighbors/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/connect_components.cuh @@ -72,7 +72,7 @@ value_idx get_n_components(value_idx* colors, size_t n_rows, cudaStream_t stream * increase in compute time as the col_batch_size is reduced */ template -void cross_component_1nn( +void cross_component_nn( raft::resources const& handle, raft::sparse::COO& out, const value_t* X, @@ -84,16 +84,16 @@ void cross_component_1nn( size_t col_batch_size = 0, raft::distance::DistanceType metric = raft::distance::DistanceType::L2SqrtExpanded) { - detail::cross_component_1nn(handle, - out, - X, - orig_colors, - n_rows, - n_cols, - reduction_op, - row_batch_size, - col_batch_size, - metric); + detail::cross_component_nn(handle, + out, + X, + orig_colors, + n_rows, + n_cols, + reduction_op, + row_batch_size, + col_batch_size, + metric); } }; // end namespace raft::sparse::neighbors \ No newline at end of file diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index df2263b619..293401c7e0 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -63,41 +63,8 @@ namespace raft::sparse::neighbors::detail { * @tparam value_t */ template -struct FixConnectivitiesRedOpBase { - typedef typename raft::KeyValuePair KVP; - DI void operator()(value_idx rit, KVP* out, const KVP& other) const - { - out->key = other.key; - out->value = other.value; - } - - DI KVP operator()(value_idx rit, const KVP& a, const KVP& b) const { return b; } - - DI void init(value_t* out, value_t maxVal) const {} - DI void init(KVP* out, value_t maxVal) const {} - - DI void init_key(value_t& out, value_idx idx) const { return; } - DI void init_key(KVP& out, value_idx idx) const {} - - DI value_t get_value(KVP& out) const { return out.value; } - - DI value_t get_value(value_t& out) const { return out; } - - // The gather and scatter ensure that operator() is still consistent after rearranging. - void gather(const raft::resources& handle, value_idx* map) {} - - void scatter(const raft::resources& handle, value_idx* map) {} -}; - -/** - * Functor with reduction ops for performing masked 1-nn - * computation. - * @tparam value_idx - * @tparam value_t - */ -template -struct FixConnectivitiesRedOp : public FixConnectivitiesRedOpBase { - value_idx m; +struct FixConnectivitiesRedOp { + value_idx m; // default constructor for cutlass DI FixConnectivitiesRedOp() : m(0) {} @@ -120,6 +87,23 @@ struct FixConnectivitiesRedOp : public FixConnectivitiesRedOpBase& coo, * is done */ template -void cross_component_1nn( +void cross_component_nn( raft::resources const& handle, raft::sparse::COO& out, const value_t* X, diff --git a/cpp/include/raft/sparse/selection/connect_components.cuh b/cpp/include/raft/sparse/selection/connect_components.cuh index ee8d61332d..5a4908c8b8 100644 --- a/cpp/include/raft/sparse/selection/connect_components.cuh +++ b/cpp/include/raft/sparse/selection/connect_components.cuh @@ -31,7 +31,7 @@ #include namespace raft::linkage { -using raft::sparse::neighbors::cross_component_1nn; +using raft::sparse::neighbors::cross_component_nn; using raft::sparse::neighbors::FixConnectivitiesRedOp; using raft::sparse::neighbors::get_n_components; } // namespace raft::linkage \ No newline at end of file diff --git a/cpp/test/cluster/linkage.cu b/cpp/test/cluster/linkage.cu index 3582150e21..52ec2efe8e 100644 --- a/cpp/test/cluster/linkage.cu +++ b/cpp/test/cluster/linkage.cu @@ -16,7 +16,7 @@ // XXX: We allow the instantiation of masked_l2_nn here: // raft::linkage::FixConnectivitiesRedOp red_op(params.n_row); -// raft::linkage::cross_component_1nn( +// raft::linkage::cross_component_nn( // handle, out_edges, data.data(), colors.data(), params.n_row, params.n_col, red_op); // // TODO: consider adding this to libraft.so or creating an instance in a diff --git a/cpp/test/sparse/neighbors/connect_components.cu b/cpp/test/sparse/neighbors/connect_components.cu index 788db3a4f2..4055969b65 100644 --- a/cpp/test/sparse/neighbors/connect_components.cu +++ b/cpp/test/sparse/neighbors/connect_components.cu @@ -16,7 +16,7 @@ // XXX: We allow the instantiation of masked_l2_nn here: // raft::linkage::FixConnectivitiesRedOp red_op(params.n_row); -// raft::linkage::cross_component_1nn( +// raft::linkage::cross_component_nn( // handle, out_edges, data.data(), colors.data(), params.n_row, params.n_col, red_op); // // TODO: consider adding this to libraft.so or creating an instance in a @@ -117,28 +117,28 @@ class ConnectComponentsTest true); /** - * 3. cross_component_1nn to fix connectivities + * 3. cross_component_nn to fix connectivities */ raft::linkage::FixConnectivitiesRedOp red_op(params.n_row); - raft::linkage::cross_component_1nn(handle, - out_edges, - data.data(), - colors.data(), - params.n_row, - params.n_col, - red_op, - params.n_row, - params.n_col); - - raft::linkage::cross_component_1nn(handle, - out_edges_batched, - data.data(), - colors.data(), - params.n_row, - params.n_col, - red_op, - params.n_row / 2, - params.n_col / 2); + raft::linkage::cross_component_nn(handle, + out_edges, + data.data(), + colors.data(), + params.n_row, + params.n_col, + red_op, + params.n_row, + params.n_col); + + raft::linkage::cross_component_nn(handle, + out_edges_batched, + data.data(), + colors.data(), + params.n_row, + params.n_col, + red_op, + params.n_row / 2, + params.n_col / 2); ASSERT_TRUE(out_edges.nnz == out_edges_batched.nnz); @@ -514,30 +514,30 @@ class ConnectComponentsEdgesTest colors.data(), params.colors.data(), colors.size(), resource::get_cuda_stream(handle)); /** - * 3. cross_component_1nn to fix connectivities + * 3. cross_component_nn to fix connectivities */ MutualReachabilityFixConnectivitiesRedOp red_op(core_dists.data(), params.n_row); - raft::linkage::cross_component_1nn(handle, - out_edges_unbatched, - data.data(), - colors.data(), - params.n_row, - params.n_col, - red_op, - params.n_row, - params.n_col); - - raft::linkage::cross_component_1nn(handle, - out_edges_batched, - data.data(), - colors.data(), - params.n_row, - params.n_col, - red_op, - 11, - 1); + raft::linkage::cross_component_nn(handle, + out_edges_unbatched, + data.data(), + colors.data(), + params.n_row, + params.n_col, + red_op, + params.n_row, + params.n_col); + + raft::linkage::cross_component_nn(handle, + out_edges_batched, + data.data(), + colors.data(), + params.n_row, + params.n_col, + red_op, + 11, + 1); ASSERT_TRUE(out_edges_unbatched.nnz == out_edges_batched.nnz && out_edges_unbatched.nnz == params.expected_rows.size()); From 2a0a4912c4a391c5972d89af33035902534a0098 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Fri, 23 Jun 2023 11:41:40 -0700 Subject: [PATCH 41/53] dbg --- .../raft/sparse/neighbors/detail/connect_components.cuh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 293401c7e0..426fd1b0de 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -101,9 +101,9 @@ struct FixConnectivitiesRedOp { /** The gather and scatter ensure that operator() is still consistent after rearranging the data. * TODO (tarang-jain): refactor cross_component_nn API to separate out the gather and scatter * functions from the reduction op. */ - virtual void gather(const raft::resources& handle, value_idx* map) {} + void gather(const raft::resources& handle, value_idx* map) {} - virtual void scatter(const raft::resources& handle, value_idx* map) {} + void scatter(const raft::resources& handle, value_idx* map) {} }; /** @@ -301,6 +301,7 @@ void perform_1nn(raft::resources const& handle, adj_view, group_idxs_view, kvp_view); + RAFT_LOG_INFO("l2_nn done"); } // Transform the keys so that they correctly point to the unpermuted indices. @@ -475,6 +476,8 @@ void cross_component_nn( constexpr bool zero_based = true; raft::label::make_monotonic( colors.data(), const_cast(orig_colors), n_rows, stream, zero_based); + + raft::print_device_vector("colors", colors.data(), n_rows, std::cout); /** * First compute 1-nn for all colors where the color of each data point @@ -494,6 +497,8 @@ void cross_component_nn( row_batch_size, col_batch_size, reduction_op); + + raft::print_device_vector("nn_colros", nn_colors.data(), n_rows, std::cout); /** * Sort data points by color (neighbors are not sorted) From 6ef2b92bc75164b8a1bd4d78fa806db12736de1a Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Fri, 23 Jun 2023 12:09:48 -0700 Subject: [PATCH 42/53] fix failing test --- .../neighbors/detail/connect_components.cuh | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 426fd1b0de..a51e896ce7 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -64,7 +64,7 @@ namespace raft::sparse::neighbors::detail { */ template struct FixConnectivitiesRedOp { - value_idx m; + value_idx m; // default constructor for cutlass DI FixConnectivitiesRedOp() : m(0) {} @@ -88,11 +88,15 @@ struct FixConnectivitiesRedOp { return b; } - DI void init(value_t* out, value_t maxVal) const {} - DI void init(KVP* out, value_t maxVal) const {} + DI void init(value_t* out, value_t maxVal) const { *out = maxVal; } + DI void init(KVP* out, value_t maxVal) const + { + out->key = -1; + out->value = maxVal; + } DI void init_key(value_t& out, value_idx idx) const { return; } - DI void init_key(KVP& out, value_idx idx) const {} + DI void init_key(KVP& out, value_idx idx) const { out.key = idx; } DI value_t get_value(KVP& out) const { return out.value; } @@ -222,7 +226,7 @@ void perform_1nn(raft::resources const& handle, thrust::sort_by_key( resource::get_thrust_policy(handle), colors, colors + n_rows, sort_plan.data_handle()); - + // Modify the reduction operation based on the sort plan. reduction_op.gather(handle, sort_plan.data_handle()); @@ -301,7 +305,6 @@ void perform_1nn(raft::resources const& handle, adj_view, group_idxs_view, kvp_view); - RAFT_LOG_INFO("l2_nn done"); } // Transform the keys so that they correctly point to the unpermuted indices. @@ -476,8 +479,6 @@ void cross_component_nn( constexpr bool zero_based = true; raft::label::make_monotonic( colors.data(), const_cast(orig_colors), n_rows, stream, zero_based); - - raft::print_device_vector("colors", colors.data(), n_rows, std::cout); /** * First compute 1-nn for all colors where the color of each data point @@ -497,8 +498,6 @@ void cross_component_nn( row_batch_size, col_batch_size, reduction_op); - - raft::print_device_vector("nn_colros", nn_colors.data(), n_rows, std::cout); /** * Sort data points by color (neighbors are not sorted) From cad9b0efddd1d0b747f5fc95ba767b6c0a1da0b5 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Fri, 23 Jun 2023 12:18:58 -0700 Subject: [PATCH 43/53] Link issue --- cpp/include/raft/sparse/neighbors/detail/connect_components.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index a51e896ce7..2664e986c6 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -104,7 +104,7 @@ struct FixConnectivitiesRedOp { /** The gather and scatter ensure that operator() is still consistent after rearranging the data. * TODO (tarang-jain): refactor cross_component_nn API to separate out the gather and scatter - * functions from the reduction op. */ + * functions from the reduction op. Reference: https://github.com/rapidsai/raft/issues/1614 */ void gather(const raft::resources& handle, value_idx* map) {} void scatter(const raft::resources& handle, value_idx* map) {} From c02d67be0bb23ccd7150197c37ba0a38ddd87c49 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Fri, 23 Jun 2023 15:28:53 -0700 Subject: [PATCH 44/53] fix docs --- cpp/include/raft/sparse/neighbors/connect_components.cuh | 1 + cpp/include/raft/sparse/neighbors/detail/connect_components.cuh | 1 + 2 files changed, 2 insertions(+) diff --git a/cpp/include/raft/sparse/neighbors/connect_components.cuh b/cpp/include/raft/sparse/neighbors/connect_components.cuh index 2e38dd03ea..ec2e9482b1 100644 --- a/cpp/include/raft/sparse/neighbors/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/connect_components.cuh @@ -70,6 +70,7 @@ value_idx get_n_components(value_idx* colors, size_t n_rows, cudaStream_t stream * scratch space buffer of shape (n_rows, col_batch_size) is created for this. Usually, this * parameter affects the memory consumption more drastically than the row_batch_size with a marginal * increase in compute time as the col_batch_size is reduced + * @param[in] metric distance metric */ template void cross_component_nn( diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh index 2664e986c6..3570be2b5c 100644 --- a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh @@ -449,6 +449,7 @@ void min_components_by_color(raft::sparse::COO& coo, * parameter affects the memory consumption more drastically than the col_batch_size with a marginal * increase in compute time as the col_batch_size is reduced. default 0 indicates that no batching * is done + * @param[in] metric distance metric */ template void cross_component_nn( From f714918967f8362cde77fdb479b105a8480cca61 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Mon, 17 Jul 2023 10:12:47 -0700 Subject: [PATCH 45/53] doc fix --- cpp/include/raft/sparse/neighbors/connect_components.cuh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/include/raft/sparse/neighbors/connect_components.cuh b/cpp/include/raft/sparse/neighbors/connect_components.cuh index ec2e9482b1..399b67d5a7 100644 --- a/cpp/include/raft/sparse/neighbors/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/connect_components.cuh @@ -60,8 +60,7 @@ value_idx get_n_components(value_idx* colors, size_t n_rows, cudaStream_t stream * @param[in] n_rows number of rows in X * @param[in] n_cols number of cols in X * @param[in] reduction_op reduction operation for computing nearest neighbors. The reduction - * operation must have `gather` and `scatter` functions defined. For single linkage clustering, - * these functions are no-ops. For HDBSCAN, they sort and 'unsort' the core distances based on color + * operation must have `gather` and `scatter` functions defined * @param[in] row_batch_size the batch size for computing nearest neighbors. This parameter controls * the number of samples for which the nearest neighbors are computed at once. Therefore, it affects * the memory consumption mainly by reducing the size of the adjacency matrix for masked nearest From 17a79a6c434f635b96b03c3df022516f74c1c8c7 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Mon, 17 Jul 2023 11:08:33 -0700 Subject: [PATCH 46/53] Added more tests for gather --- cpp/test/matrix/gather.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/test/matrix/gather.cu b/cpp/test/matrix/gather.cu index c9f3f727ef..b1228f05ca 100644 --- a/cpp/test/matrix/gather.cu +++ b/cpp/test/matrix/gather.cu @@ -210,10 +210,10 @@ const std::vector> inputs_i64 = {25, 2000}, {6, 31, 129}, {11, 999}, {2, 3, 6}, {1234ULL}); const std::vector> inplace_inputs_i32 = raft::util::itertools::product>( - {2000}, {6, 31, 129}, {11, 999}, {2, 3, 6}, {1234ULL}); + {25, 2000}, {6, 31, 129}, {11, 999}, {0, 1, 2, 3, 6, 100}, {1234ULL}); const std::vector> inplace_inputs_i64 = raft::util::itertools::product>( - {2000}, {6, 31, 129}, {11, 999}, {2, 3, 6}, {1234ULL}); + {25, 2000}, {6, 31, 129}, {11, 999}, {0, 1, 2, 3, 6, 100}, {1234ULL}); GATHER_TEST((GatherTest), GatherTestFU32I32, inputs_i32); GATHER_TEST((GatherTest), From a61df96eaa5a7c2270226e518677fef7a016c4ac Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Mon, 17 Jul 2023 11:14:01 -0700 Subject: [PATCH 47/53] Scatter tests --- cpp/test/matrix/scatter.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/test/matrix/scatter.cu b/cpp/test/matrix/scatter.cu index 195df07203..689924f28f 100644 --- a/cpp/test/matrix/scatter.cu +++ b/cpp/test/matrix/scatter.cu @@ -132,7 +132,7 @@ const std::vector> inputs_i32 = raft::util::itertools::product>({2000}, {6, 31, 129}, {2, 3, 6}, {1234ULL}); const std::vector> inputs_i64 = raft::util::itertools::product>( - {2000}, {6, 31, 129}, {2, 3, 6}, {1234ULL}); + {25, 2000}, {6, 31, 129}, {0, 1, 2, 3, 6, 100}, {1234ULL}); SCATTER_TEST((ScatterTest), ScatterTestFI32, inputs_i32); SCATTER_TEST((ScatterTest), ScatterTestFI64, inputs_i64); From 91ee0c760aebf0392f8fb5bdaca22188e94d60f0 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Thu, 20 Jul 2023 09:45:28 -0700 Subject: [PATCH 48/53] Update tests, reenaming API headers --- cpp/include/raft/cluster/detail/mst.cuh | 2 +- .../{connect_components.cuh => cross_component_nn.cuh} | 2 +- .../detail/{connect_components.cuh => cross_component_nn.cuh} | 0 .../{connect_components.cuh => cross_component_nn.cuh} | 4 ++-- cpp/test/CMakeLists.txt | 2 +- cpp/test/matrix/scatter.cu | 2 +- .../{connect_components.cu => cross_component_nn.cu} | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) rename cpp/include/raft/sparse/neighbors/{connect_components.cuh => cross_component_nn.cuh} (98%) rename cpp/include/raft/sparse/neighbors/detail/{connect_components.cuh => cross_component_nn.cuh} (100%) rename cpp/include/raft/sparse/selection/{connect_components.cuh => cross_component_nn.cuh} (90%) rename cpp/test/sparse/neighbors/{connect_components.cu => cross_component_nn.cu} (99%) diff --git a/cpp/include/raft/cluster/detail/mst.cuh b/cpp/include/raft/cluster/detail/mst.cuh index 31427fecfe..a962d4b7c6 100644 --- a/cpp/include/raft/cluster/detail/mst.cuh +++ b/cpp/include/raft/cluster/detail/mst.cuh @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include diff --git a/cpp/include/raft/sparse/neighbors/connect_components.cuh b/cpp/include/raft/sparse/neighbors/cross_component_nn.cuh similarity index 98% rename from cpp/include/raft/sparse/neighbors/connect_components.cuh rename to cpp/include/raft/sparse/neighbors/cross_component_nn.cuh index 399b67d5a7..c94c6254c3 100644 --- a/cpp/include/raft/sparse/neighbors/connect_components.cuh +++ b/cpp/include/raft/sparse/neighbors/cross_component_nn.cuh @@ -19,7 +19,7 @@ #include #include #include -#include +#include namespace raft::sparse::neighbors { diff --git a/cpp/include/raft/sparse/neighbors/detail/connect_components.cuh b/cpp/include/raft/sparse/neighbors/detail/cross_component_nn.cuh similarity index 100% rename from cpp/include/raft/sparse/neighbors/detail/connect_components.cuh rename to cpp/include/raft/sparse/neighbors/detail/cross_component_nn.cuh diff --git a/cpp/include/raft/sparse/selection/connect_components.cuh b/cpp/include/raft/sparse/selection/cross_component_nn.cuh similarity index 90% rename from cpp/include/raft/sparse/selection/connect_components.cuh rename to cpp/include/raft/sparse/selection/cross_component_nn.cuh index 5a4908c8b8..e115d6c061 100644 --- a/cpp/include/raft/sparse/selection/connect_components.cuh +++ b/cpp/include/raft/sparse/selection/cross_component_nn.cuh @@ -19,7 +19,7 @@ */ /** - * DISCLAIMER: this file is deprecated: use connect_components.cuh instead + * DISCLAIMER: this file is deprecated: use cross_component_nn.cuh instead */ #pragma once @@ -28,7 +28,7 @@ " is deprecated and will be removed in a future release." \ " Please use the sparse/spatial version instead.") -#include +#include namespace raft::linkage { using raft::sparse::neighbors::cross_component_nn; diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 36640d0143..0d272095f3 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -304,7 +304,7 @@ if(BUILD_TESTS) NAME SPARSE_NEIGHBORS_TEST PATH - test/sparse/neighbors/connect_components.cu + test/sparse/neighbors/cross_component_nn.cu test/sparse/neighbors/brute_force.cu test/sparse/neighbors/knn_graph.cu OPTIONAL diff --git a/cpp/test/matrix/scatter.cu b/cpp/test/matrix/scatter.cu index 689924f28f..b703e3b00a 100644 --- a/cpp/test/matrix/scatter.cu +++ b/cpp/test/matrix/scatter.cu @@ -129,7 +129,7 @@ class ScatterTest : public ::testing::TestWithParam> { INSTANTIATE_TEST_CASE_P(ScatterTests, test_name, ::testing::ValuesIn(test_inputs)) const std::vector> inputs_i32 = - raft::util::itertools::product>({2000}, {6, 31, 129}, {2, 3, 6}, {1234ULL}); + raft::util::itertools::product>({25, 2000}, {6, 31, 129}, {0, 1, 2, 3, 6, 100}, {1234ULL}); const std::vector> inputs_i64 = raft::util::itertools::product>( {25, 2000}, {6, 31, 129}, {0, 1, 2, 3, 6, 100}, {1234ULL}); diff --git a/cpp/test/sparse/neighbors/connect_components.cu b/cpp/test/sparse/neighbors/cross_component_nn.cu similarity index 99% rename from cpp/test/sparse/neighbors/connect_components.cu rename to cpp/test/sparse/neighbors/cross_component_nn.cu index 4055969b65..97fc733836 100644 --- a/cpp/test/sparse/neighbors/connect_components.cu +++ b/cpp/test/sparse/neighbors/cross_component_nn.cu @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include From 99900624c97a408033e95874056a7dcfef2d9cb2 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Thu, 20 Jul 2023 14:16:20 -0700 Subject: [PATCH 49/53] Stylegi --- cpp/test/matrix/scatter.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/test/matrix/scatter.cu b/cpp/test/matrix/scatter.cu index b703e3b00a..3a1a40086e 100644 --- a/cpp/test/matrix/scatter.cu +++ b/cpp/test/matrix/scatter.cu @@ -129,7 +129,8 @@ class ScatterTest : public ::testing::TestWithParam> { INSTANTIATE_TEST_CASE_P(ScatterTests, test_name, ::testing::ValuesIn(test_inputs)) const std::vector> inputs_i32 = - raft::util::itertools::product>({25, 2000}, {6, 31, 129}, {0, 1, 2, 3, 6, 100}, {1234ULL}); + raft::util::itertools::product>( + {25, 2000}, {6, 31, 129}, {0, 1, 2, 3, 6, 100}, {1234ULL}); const std::vector> inputs_i64 = raft::util::itertools::product>( {25, 2000}, {6, 31, 129}, {0, 1, 2, 3, 6, 100}, {1234ULL}); From 6ea871db0594ba6265a87b01ba23cd1afbe849ff Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Fri, 21 Jul 2023 09:51:25 -0700 Subject: [PATCH 50/53] remove EXPLICIT_INSTANTIATE --- cpp/test/sparse/neighbors/cross_component_nn.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/test/sparse/neighbors/cross_component_nn.cu b/cpp/test/sparse/neighbors/cross_component_nn.cu index 97fc733836..5c2caf1268 100644 --- a/cpp/test/sparse/neighbors/cross_component_nn.cu +++ b/cpp/test/sparse/neighbors/cross_component_nn.cu @@ -23,7 +23,6 @@ // separate translation unit for this test. // // TODO: edge case testing -#undef RAFT_EXPLICIT_INSTANTIATE_ONLY #include #include From 92b869774dcd32c89d94ba3c6af6bb8dd70c45bc Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Tue, 25 Jul 2023 10:48:10 -0700 Subject: [PATCH 51/53] remove todo --- cpp/test/sparse/neighbors/cross_component_nn.cu | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/test/sparse/neighbors/cross_component_nn.cu b/cpp/test/sparse/neighbors/cross_component_nn.cu index 5c2caf1268..0cb3092263 100644 --- a/cpp/test/sparse/neighbors/cross_component_nn.cu +++ b/cpp/test/sparse/neighbors/cross_component_nn.cu @@ -21,8 +21,6 @@ // // TODO: consider adding this to libraft.so or creating an instance in a // separate translation unit for this test. -// -// TODO: edge case testing #include #include From 03d9ef41ccf24dfc44e052be5db28af6f1dbc110 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Tue, 25 Jul 2023 12:26:07 -0700 Subject: [PATCH 52/53] revert --- cpp/test/sparse/neighbors/cross_component_nn.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/test/sparse/neighbors/cross_component_nn.cu b/cpp/test/sparse/neighbors/cross_component_nn.cu index 0cb3092263..5c2caf1268 100644 --- a/cpp/test/sparse/neighbors/cross_component_nn.cu +++ b/cpp/test/sparse/neighbors/cross_component_nn.cu @@ -21,6 +21,8 @@ // // TODO: consider adding this to libraft.so or creating an instance in a // separate translation unit for this test. +// +// TODO: edge case testing #include #include From 2da5b8c72f25d16dd7ebebb8a0ea59bc34809a9c Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Tue, 25 Jul 2023 12:36:36 -0700 Subject: [PATCH 53/53] Update todo --- cpp/test/sparse/neighbors/cross_component_nn.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/test/sparse/neighbors/cross_component_nn.cu b/cpp/test/sparse/neighbors/cross_component_nn.cu index 5c2caf1268..7cadf25e88 100644 --- a/cpp/test/sparse/neighbors/cross_component_nn.cu +++ b/cpp/test/sparse/neighbors/cross_component_nn.cu @@ -22,7 +22,7 @@ // TODO: consider adding this to libraft.so or creating an instance in a // separate translation unit for this test. // -// TODO: edge case testing +// TODO: edge case testing. Reference: https://github.com/rapidsai/raft/issues/1669 #include #include