Hiding implementation details for lap, clustering, spectral, and label (

#477) Also managed to remove the raft host/device buffers in the process Authors: - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Divye Gala (https://github.com/divyegala) URL: #477
rapidsai · Feb 9, 2022 · 2ebf89c · 2ebf89c
1 parent 29718bd
commit 2ebf89c
Show file tree

Hide file tree

Showing 46 changed files with 697 additions and 784 deletions.
diff --git a/cpp/include/raft/spectral/kmeans.hpp → cpp/include/raft/cluster/detail/kmeans.cuh b/cpp/include/raft/spectral/kmeans.hpp → cpp/include/raft/cluster/detail/kmeans.cuh
@@ -32,13 +32,12 @@
 #include <raft/device_atomics.cuh>
 #include <raft/handle.hpp>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
-#include <raft/spectral/matrix_wrappers.hpp>
-#include <raft/spectral/warn_dbg.hpp>
+#include <raft/spectral/detail/matrix_wrappers.cuh>
+#include <raft/spectral/detail/warn_dbg.hpp>
 
-namespace {
-
-using namespace raft;
-using namespace raft::linalg;
+namespace raft {
+namespace cluster {
+namespace detail {
 // =========================================================
 // Useful grid settings
 // =========================================================
@@ -728,10 +727,6 @@ static int updateCentroids(handle_t const& handle,
   return 0;
 }
 
-}  // namespace
-
-namespace raft {
-
 // =========================================================
 // k-means algorithm
 // =========================================================
@@ -986,4 +981,6 @@ int kmeans(handle_t const& handle,
                                             seed);
 }
 
+}  // namespace detail
+}  // namespace cluster
 }  // namespace raft
diff --git a/cpp/include/raft/cluster/kmeans.hpp b/cpp/include/raft/cluster/kmeans.hpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <raft/cluster/detail/kmeans.cuh>
+
+namespace raft {
+namespace cluster {
+
+/**
+ *  @brief Find clusters with k-means algorithm.
+ *    Initial centroids are chosen with k-means++ algorithm. Empty
+ *    clusters are reinitialized by choosing new centroids with
+ *    k-means++ algorithm.
+ *  @tparam index_type_t the type of data used for indexing.
+ *  @tparam value_type_t the type of data used for weights, distances.
+ *  @param handle the raft handle.
+ *  @param n Number of observation vectors.
+ *  @param d Dimension of observation vectors.
+ *  @param k Number of clusters.
+ *  @param tol Tolerance for convergence. k-means stops when the
+ *    change in residual divided by n is less than tol.
+ *  @param maxiter Maximum number of k-means iterations.
+ *  @param obs (Input, device memory, d*n entries) Observation
+ *    matrix. Matrix is stored column-major and each column is an
+ *    observation vector. Matrix dimensions are d x n.
+ *  @param codes (Output, device memory, n entries) Cluster
+ *    assignments.
+ *  @param residual On exit, residual sum of squares (sum of squares
+ *    of distances between observation vectors and centroids).
+ *  @param iters on exit, number of k-means iterations.
+ *  @param seed random seed to be used.
+ *  @return error flag
+ */
+template <typename index_type_t, typename value_type_t>
+int kmeans(handle_t const& handle,
+           index_type_t n,
+           index_type_t d,
+           index_type_t k,
+           value_type_t tol,
+           index_type_t maxiter,
+           const value_type_t* __restrict__ obs,
+           index_type_t* __restrict__ codes,
+           value_type_t& residual,
+           index_type_t& iters,
+           unsigned long long seed = 123456)
+{
+  return detail::kmeans<index_type_t, value_type_t>(
+    handle, n, d, k, tol, maxiter, obs, codes, residual, iters, seed);
+}
+}  // namespace cluster
+}  // namespace raft
diff --git a/cpp/include/raft/comms/helper.hpp b/cpp/include/raft/comms/helper.hpp
@@ -18,7 +18,6 @@
 
 #include <raft/comms/std_comms.hpp>
 #include <raft/handle.hpp>
-#include <raft/mr/device/buffer.hpp>
 
 #include <iostream>
 #include <nccl.h>

diff --git a/cpp/include/raft/comms/std_comms.hpp b/cpp/include/raft/comms/std_comms.hpp
@@ -21,8 +21,6 @@
 #include <raft/comms/comms.hpp>
 #include <raft/comms/detail/std_comms.hpp>
 
-#include <raft/mr/device/buffer.hpp>
-
 #include <iostream>
 #include <nccl.h>
 #include <ucp/api/ucp.h>

diff --git a/cpp/include/raft/label/classlabels.hpp b/cpp/include/raft/label/classlabels.hpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/label/detail/classlabels.cuh>
+
+namespace raft {
+namespace label {
+
+/**
+ * Get unique class labels.
+ *
+ * The y array is assumed to store class labels. The unique values are selected
+ * from this array.
+ *
+ * @tparam value_t numeric type of the arrays with class labels
+ * @param [inout] unique output unique labels
+ * @param [in] y device array of labels, size [n]
+ * @param [in] n number of labels
+ * @param [in] stream cuda stream
+ * @returns unique device array of unique labels, unallocated on entry,
+ *   on exit it has size
+ */
+template <typename value_t>
+int getUniquelabels(rmm::device_uvector<value_t>& unique, value_t* y, size_t n, cudaStream_t stream)
+{
+  return detail::getUniquelabels<value_t>(unique, y, n, stream);
+}
+
+/**
+ * Assign one versus rest labels.
+ *
+ * The output labels will have values +/-1:
+ * y_out = (y == y_unique[idx]) ? +1 : -1;
+ *
+ * The output type currently is set to value_t, but for SVM in principle we are
+ * free to choose other type for y_out (it should represent +/-1, and it is used
+ * in floating point arithmetics).
+ *
+ * @param [in] y device array if input labels, size [n]
+ * @param [in] n number of labels
+ * @param [in] y_unique device array of unique labels, size [n_classes]
+ * @param [in] n_classes number of unique labels
+ * @param [out] y_out device array of output labels
+ * @param [in] idx index of unique label that should be labeled as 1
+ * @param [in] stream cuda stream
+ */
+template <typename value_t>
+void getOvrlabels(
+  value_t* y, int n, value_t* y_unique, int n_classes, value_t* y_out, int idx, cudaStream_t stream)
+{
+  detail::getOvrlabels<value_t>(y, n, y_unique, n_classes, y_out, idx, stream);
+}
+/**
+ * Maps an input array containing a series of numbers into a new array
+ * where numbers have been mapped to a monotonically increasing set
+ * of labels. This can be useful in machine learning algorithms, for instance,
+ * where a given set of labels is not taken from a monotonically increasing
+ * set. This can happen if they are filtered or if only a subset of the
+ * total labels are used in a dataset. This is also useful in graph algorithms
+ * where a set of vertices need to be labeled in a monotonically increasing
+ * order.
+ * @tparam Type the numeric type of the input and output arrays
+ * @tparam Lambda the type of an optional filter function, which determines
+ * which items in the array to map.
+ * @param[out] out the output monotonic array
+ * @param[in] in input label array
+ * @param[in] N number of elements in the input array
+ * @param[in] stream cuda stream to use
+ * @param[in] filter_op an optional function for specifying which values
+ * should have monotonically increasing labels applied to them.
+ * @param[in] zero_based force monotonic set to start at 0?
+ */
+template <typename Type, typename Lambda>
+void make_monotonic(
+  Type* out, Type* in, size_t N, cudaStream_t stream, Lambda filter_op, bool zero_based = false)
+{
+  detail::make_monotonic<Type, Lambda>(out, in, N, stream, filter_op, zero_based);
+}
+
+/**
+ * Maps an input array containing a series of numbers into a new array
+ * where numbers have been mapped to a monotonically increasing set
+ * of labels. This can be useful in machine learning algorithms, for instance,
+ * where a given set of labels is not taken from a monotonically increasing
+ * set. This can happen if they are filtered or if only a subset of the
+ * total labels are used in a dataset. This is also useful in graph algorithms
+ * where a set of vertices need to be labeled in a monotonically increasing
+ * order.
+ * @tparam Type the numeric type of the input and output arrays
+ * @param[out] out output label array with labels assigned monotonically
+ * @param[in] in input label array
+ * @param[in] N number of elements in the input array
+ * @param[in] stream cuda stream to use
+ * @param[in] zero_based force monotonic label set to start at 0?
+ */
+template <typename Type>
+void make_monotonic(Type* out, Type* in, size_t N, cudaStream_t stream, bool zero_based = false)
+{
+  detail::make_monotonic<Type>(out, in, N, stream, zero_based);
+}
+};  // namespace label
+};  // end namespace raft
diff --git a/cpp/include/raft/label/classlabels.cuh → ...include/raft/label/detail/classlabels.cuh b/cpp/include/raft/label/classlabels.cuh → ...include/raft/label/detail/classlabels.cuh
@@ -26,6 +26,7 @@
 
 namespace raft {
 namespace label {
+namespace detail {
 
 /**
  * Get unique class labels.
@@ -194,5 +195,7 @@ void make_monotonic(Type* out, Type* in, size_t N, cudaStream_t stream, bool zer
   make_monotonic<Type>(
     out, in, N, stream, [] __device__(Type val) { return false; }, zero_based);
 }
+
+};  // namespace detail
 };  // namespace label
 };  // end namespace raft
diff --git a/cpp/include/raft/label/merge_labels.cuh → ...nclude/raft/label/detail/merge_labels.cuh b/cpp/include/raft/label/merge_labels.cuh → ...nclude/raft/label/detail/merge_labels.cuh
@@ -25,6 +25,7 @@
 
 namespace raft {
 namespace label {
+namespace detail {
 
 /** Note: this is one possible implementation where we represent the label
  *  equivalence graph implicitly using labels_a, labels_b and mask.
@@ -153,5 +154,6 @@ void merge_labels(value_idx* labels_a,
   RAFT_CUDA_TRY(cudaPeekAtLastError());
 }
 
+}  // namespace detail
 };  // namespace label
 };  // namespace raft
diff --git a/cpp/include/raft/label/merge_labels.hpp b/cpp/include/raft/label/merge_labels.hpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/label/detail/merge_labels.cuh>
+
+namespace raft {
+namespace label {
+
+/**
+ * @brief Merge two labellings in-place, according to a core mask
+ *
+ * A labelling is a representation of disjoint sets (groups) where points that
+ * belong to the same group have the same label. It is assumed that group
+ * labels take values between 1 and N. labels relate to points, i.e a label i+1
+ * means that you belong to the same group as the point i.
+ * The special value MAX_LABEL is used to mark points that are not labelled.
+ *
+ * The two label arrays A and B induce two sets of groups over points 0..N-1.
+ * If a point is labelled i in A and j in B and the mask is true for this
+ * point, then i and j are equivalent labels and their groups are merged by
+ * relabeling the elements of both groups to have the same label. The new label
+ * is the smaller one from the original labels.
+ * It is required that if the mask is true for a point, this point is labelled
+ * (i.e its label is different than the special value MAX_LABEL).
+ *
+ * One use case is finding connected components: the two input label arrays can
+ * represent the connected components of graphs G_A and G_B, and the output
+ * would be the connected components labels of G_A \union G_B.
+ *
+ * @param[inout] labels_a    First input, and output label array (in-place)
+ * @param[in]    labels_b    Second input label array
+ * @param[in]    mask        Core point mask
+ * @param[out]   R           label equivalence map
+ * @param[in]    m           Working flag
+ * @param[in]    N           Number of points in the dataset
+ * @param[in]    stream      CUDA stream
+ */
+template <typename value_idx = int, int TPB_X = 256>
+void merge_labels(value_idx* labels_a,
+                  const value_idx* labels_b,
+                  const bool* mask,
+                  value_idx* R,
+                  bool* m,
+                  value_idx N,
+                  cudaStream_t stream)
+{
+  detail::merge_labels<value_idx, TPB_X>(labels_a, labels_b, mask, R, m, N, stream);
+}
+
+};  // namespace label
+};  // namespace raft
diff --git a/cpp/include/raft/lap/d_structs.h → cpp/include/raft/lap/detail/d_structs.h b/cpp/include/raft/lap/d_structs.h → cpp/include/raft/lap/detail/d_structs.h
diff --git a/cpp/include/raft/lap/lap_functions.cuh → ...include/raft/lap/detail/lap_functions.cuh b/cpp/include/raft/lap/lap_functions.cuh → ...include/raft/lap/detail/lap_functions.cuh
@@ -28,7 +28,7 @@
 
 #include <raft/cudart_utils.h>
 #include <raft/handle.hpp>
-#include <raft/lap/lap_kernels.cuh>
+#include <raft/lap/detail/lap_kernels.cuh>
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 

diff --git a/cpp/include/raft/lap/lap_kernels.cuh → cpp/include/raft/lap/detail/lap_kernels.cuh b/cpp/include/raft/lap/lap_kernels.cuh → cpp/include/raft/lap/detail/lap_kernels.cuh
@@ -28,7 +28,6 @@
 
 #include <raft/cudart_utils.h>
 #include <raft/handle.hpp>
-#include <raft/mr/device/buffer.hpp>
 
 #include <thrust/for_each.h>
 

diff --git a/cpp/include/raft/lap/lap.cuh → cpp/include/raft/lap/lap.hpp b/cpp/include/raft/lap/lap.cuh → cpp/include/raft/lap/lap.hpp
@@ -27,8 +27,8 @@
 #include <raft/handle.hpp>
 #include <rmm/device_uvector.hpp>
 
-#include "d_structs.h"
-#include "lap_functions.cuh"
+#include "detail/d_structs.h"
+#include "detail/lap_functions.cuh"
 
 namespace raft {
 namespace lap {

diff --git a/cpp/include/raft/linalg/detail/lanczos.hpp b/cpp/include/raft/linalg/detail/lanczos.hpp
@@ -28,9 +28,9 @@
 #include "cublas_wrappers.hpp"
 #include <raft/cudart_utils.h>
 #include <raft/handle.hpp>
-#include <raft/spectral/lapack.hpp>
-#include <raft/spectral/matrix_wrappers.hpp>
-#include <raft/spectral/warn_dbg.hpp>
+#include <raft/spectral/detail/lapack.hpp>
+#include <raft/spectral/detail/matrix_wrappers.cuh>
+#include <raft/spectral/detail/warn_dbg.hpp>
 
 namespace raft {