From 2ebf89c3969d23f8f394a409909997c8d9593ae1 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@users.noreply.github.com>
Date: Tue, 8 Feb 2022 22:19:20 -0500
Subject: [PATCH] Hiding implementation details for lap, clustering, spectral,
 and label (#477)

Also managed to remove the raft host/device buffers in the process

Authors:
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Divye Gala (https://github.com/divyegala)

URL: https://github.com/rapidsai/raft/pull/477
---
 .../kmeans.hpp => cluster/detail/kmeans.cuh}  |  17 +-
 cpp/include/raft/cluster/kmeans.hpp           |  65 ++++++
 cpp/include/raft/comms/helper.hpp             |   1 -
 cpp/include/raft/comms/std_comms.hpp          |   2 -
 cpp/include/raft/label/classlabels.hpp        | 117 ++++++++++
 .../raft/label/{ => detail}/classlabels.cuh   |   3 +
 .../raft/label/{ => detail}/merge_labels.cuh  |   2 +
 cpp/include/raft/label/merge_labels.hpp       |  66 ++++++
 cpp/include/raft/lap/{ => detail}/d_structs.h |   0
 .../raft/lap/{ => detail}/lap_functions.cuh   |   2 +-
 .../raft/lap/{ => detail}/lap_kernels.cuh     |   1 -
 cpp/include/raft/lap/{lap.cuh => lap.hpp}     |   4 +-
 cpp/include/raft/linalg/detail/lanczos.hpp    |   6 +-
 cpp/include/raft/mr/buffer_base.hpp           | 211 ------------------
 cpp/include/raft/mr/device/buffer.hpp         |  70 ------
 cpp/include/raft/mr/host/buffer.hpp           |  85 -------
 .../raft/sparse/distance/detail/coo_spmv.cuh  |   1 -
 .../raft/sparse/distance/detail/utils.cuh     |   1 -
 .../sparse/hierarchy/detail/agglomerative.cuh |   1 -
 .../hierarchy/detail/connectivities.cuh       |   1 -
 .../raft/sparse/hierarchy/detail/mst.cuh      |   1 -
 .../raft/sparse/linalg/detail/spectral.cuh    |   8 +-
 cpp/include/raft/sparse/op/detail/reduce.cuh  |   1 -
 .../selection/detail/connect_components.cuh   |   3 +-
 .../raft/sparse/selection/detail/knn.cuh      |   1 -
 cpp/include/raft/spatial/knn/ann.hpp          |   2 -
 cpp/include/raft/spatial/knn/knn.hpp          |   4 -
 cpp/include/raft/spectral/cluster_solvers.hpp |  28 ++-
 .../raft/spectral/{ => detail}/lapack.hpp     |   0
 .../matrix_wrappers.cuh}                      |   0
 .../detail/modularity_maximization.hpp        | 188 ++++++++++++++++
 .../raft/spectral/detail/partition.hpp        | 182 +++++++++++++++
 .../spectral_util.cuh}                        |   0
 .../raft/spectral/{ => detail}/warn_dbg.hpp   |   0
 cpp/include/raft/spectral/eigen_solvers.hpp   |   3 +
 .../raft/spectral/modularity_maximization.hpp | 110 +--------
 cpp/include/raft/spectral/partition.hpp       |  94 +-------
 cpp/test/CMakeLists.txt                       |   2 -
 cpp/test/cluster_solvers.cu                   |  10 +-
 cpp/test/eigen_solvers.cu                     |  17 +-
 cpp/test/label/label.cu                       |   2 +-
 cpp/test/label/merge_labels.cu                |   2 +-
 cpp/test/lap/lap.cu                           |   2 +-
 cpp/test/mr/device/buffer.cpp                 |  92 --------
 cpp/test/mr/host/buffer.cpp                   |  71 ------
 cpp/test/spectral_matrix.cu                   |   2 +-
 46 files changed, 697 insertions(+), 784 deletions(-)
 rename cpp/include/raft/{spectral/kmeans.hpp => cluster/detail/kmeans.cuh} (99%)
 create mode 100644 cpp/include/raft/cluster/kmeans.hpp
 create mode 100644 cpp/include/raft/label/classlabels.hpp
 rename cpp/include/raft/label/{ => detail}/classlabels.cuh (99%)
 rename cpp/include/raft/label/{ => detail}/merge_labels.cuh (99%)
 create mode 100644 cpp/include/raft/label/merge_labels.hpp
 rename cpp/include/raft/lap/{ => detail}/d_structs.h (100%)
 rename cpp/include/raft/lap/{ => detail}/lap_functions.cuh (99%)
 rename cpp/include/raft/lap/{ => detail}/lap_kernels.cuh (99%)
 rename cpp/include/raft/lap/{lap.cuh => lap.hpp} (99%)
 delete mode 100644 cpp/include/raft/mr/buffer_base.hpp
 delete mode 100644 cpp/include/raft/mr/device/buffer.hpp
 delete mode 100644 cpp/include/raft/mr/host/buffer.hpp
 rename cpp/include/raft/spectral/{ => detail}/lapack.hpp (100%)
 rename cpp/include/raft/spectral/{matrix_wrappers.hpp => detail/matrix_wrappers.cuh} (100%)
 create mode 100644 cpp/include/raft/spectral/detail/modularity_maximization.hpp
 create mode 100644 cpp/include/raft/spectral/detail/partition.hpp
 rename cpp/include/raft/spectral/{spectral_util.hpp => detail/spectral_util.cuh} (100%)
 rename cpp/include/raft/spectral/{ => detail}/warn_dbg.hpp (100%)
 delete mode 100644 cpp/test/mr/device/buffer.cpp
 delete mode 100644 cpp/test/mr/host/buffer.cpp

diff --git a/cpp/include/raft/spectral/kmeans.hpp b/cpp/include/raft/cluster/detail/kmeans.cuh
similarity index 99%
rename from cpp/include/raft/spectral/kmeans.hpp
rename to cpp/include/raft/cluster/detail/kmeans.cuh
index 56f4022a8c..5f1a0e137d 100644
--- a/cpp/include/raft/spectral/kmeans.hpp
+++ b/cpp/include/raft/cluster/detail/kmeans.cuh
@@ -32,13 +32,12 @@
 #include <raft/device_atomics.cuh>
 #include <raft/handle.hpp>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
-#include <raft/spectral/matrix_wrappers.hpp>
-#include <raft/spectral/warn_dbg.hpp>
+#include <raft/spectral/detail/matrix_wrappers.cuh>
+#include <raft/spectral/detail/warn_dbg.hpp>
 
-namespace {
-
-using namespace raft;
-using namespace raft::linalg;
+namespace raft {
+namespace cluster {
+namespace detail {
 // =========================================================
 // Useful grid settings
 // =========================================================
@@ -728,10 +727,6 @@ static int updateCentroids(handle_t const& handle,
   return 0;
 }
 
-}  // namespace
-
-namespace raft {
-
 // =========================================================
 // k-means algorithm
 // =========================================================
@@ -986,4 +981,6 @@ int kmeans(handle_t const& handle,
                                             seed);
 }
 
+}  // namespace detail
+}  // namespace cluster
 }  // namespace raft
diff --git a/cpp/include/raft/cluster/kmeans.hpp b/cpp/include/raft/cluster/kmeans.hpp
new file mode 100644
index 0000000000..ab0fbb04c7
--- /dev/null
+++ b/cpp/include/raft/cluster/kmeans.hpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <raft/cluster/detail/kmeans.cuh>
+
+namespace raft {
+namespace cluster {
+
+/**
+ *  @brief Find clusters with k-means algorithm.
+ *    Initial centroids are chosen with k-means++ algorithm. Empty
+ *    clusters are reinitialized by choosing new centroids with
+ *    k-means++ algorithm.
+ *  @tparam index_type_t the type of data used for indexing.
+ *  @tparam value_type_t the type of data used for weights, distances.
+ *  @param handle the raft handle.
+ *  @param n Number of observation vectors.
+ *  @param d Dimension of observation vectors.
+ *  @param k Number of clusters.
+ *  @param tol Tolerance for convergence. k-means stops when the
+ *    change in residual divided by n is less than tol.
+ *  @param maxiter Maximum number of k-means iterations.
+ *  @param obs (Input, device memory, d*n entries) Observation
+ *    matrix. Matrix is stored column-major and each column is an
+ *    observation vector. Matrix dimensions are d x n.
+ *  @param codes (Output, device memory, n entries) Cluster
+ *    assignments.
+ *  @param residual On exit, residual sum of squares (sum of squares
+ *    of distances between observation vectors and centroids).
+ *  @param iters on exit, number of k-means iterations.
+ *  @param seed random seed to be used.
+ *  @return error flag
+ */
+template <typename index_type_t, typename value_type_t>
+int kmeans(handle_t const& handle,
+           index_type_t n,
+           index_type_t d,
+           index_type_t k,
+           value_type_t tol,
+           index_type_t maxiter,
+           const value_type_t* __restrict__ obs,
+           index_type_t* __restrict__ codes,
+           value_type_t& residual,
+           index_type_t& iters,
+           unsigned long long seed = 123456)
+{
+  return detail::kmeans<index_type_t, value_type_t>(
+    handle, n, d, k, tol, maxiter, obs, codes, residual, iters, seed);
+}
+}  // namespace cluster
+}  // namespace raft
diff --git a/cpp/include/raft/comms/helper.hpp b/cpp/include/raft/comms/helper.hpp
index 09a767bea7..d83e8f4d4f 100644
--- a/cpp/include/raft/comms/helper.hpp
+++ b/cpp/include/raft/comms/helper.hpp
@@ -18,7 +18,6 @@
 
 #include <raft/comms/std_comms.hpp>
 #include <raft/handle.hpp>
-#include <raft/mr/device/buffer.hpp>
 
 #include <iostream>
 #include <nccl.h>
diff --git a/cpp/include/raft/comms/std_comms.hpp b/cpp/include/raft/comms/std_comms.hpp
index f54535a88c..b4aa72d53e 100644
--- a/cpp/include/raft/comms/std_comms.hpp
+++ b/cpp/include/raft/comms/std_comms.hpp
@@ -21,8 +21,6 @@
 #include <raft/comms/comms.hpp>
 #include <raft/comms/detail/std_comms.hpp>
 
-#include <raft/mr/device/buffer.hpp>
-
 #include <iostream>
 #include <nccl.h>
 #include <ucp/api/ucp.h>
diff --git a/cpp/include/raft/label/classlabels.hpp b/cpp/include/raft/label/classlabels.hpp
new file mode 100644
index 0000000000..de9f60518d
--- /dev/null
+++ b/cpp/include/raft/label/classlabels.hpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/label/detail/classlabels.cuh>
+
+namespace raft {
+namespace label {
+
+/**
+ * Get unique class labels.
+ *
+ * The y array is assumed to store class labels. The unique values are selected
+ * from this array.
+ *
+ * @tparam value_t numeric type of the arrays with class labels
+ * @param [inout] unique output unique labels
+ * @param [in] y device array of labels, size [n]
+ * @param [in] n number of labels
+ * @param [in] stream cuda stream
+ * @returns unique device array of unique labels, unallocated on entry,
+ *   on exit it has size
+ */
+template <typename value_t>
+int getUniquelabels(rmm::device_uvector<value_t>& unique, value_t* y, size_t n, cudaStream_t stream)
+{
+  return detail::getUniquelabels<value_t>(unique, y, n, stream);
+}
+
+/**
+ * Assign one versus rest labels.
+ *
+ * The output labels will have values +/-1:
+ * y_out = (y == y_unique[idx]) ? +1 : -1;
+ *
+ * The output type currently is set to value_t, but for SVM in principle we are
+ * free to choose other type for y_out (it should represent +/-1, and it is used
+ * in floating point arithmetics).
+ *
+ * @param [in] y device array if input labels, size [n]
+ * @param [in] n number of labels
+ * @param [in] y_unique device array of unique labels, size [n_classes]
+ * @param [in] n_classes number of unique labels
+ * @param [out] y_out device array of output labels
+ * @param [in] idx index of unique label that should be labeled as 1
+ * @param [in] stream cuda stream
+ */
+template <typename value_t>
+void getOvrlabels(
+  value_t* y, int n, value_t* y_unique, int n_classes, value_t* y_out, int idx, cudaStream_t stream)
+{
+  detail::getOvrlabels<value_t>(y, n, y_unique, n_classes, y_out, idx, stream);
+}
+/**
+ * Maps an input array containing a series of numbers into a new array
+ * where numbers have been mapped to a monotonically increasing set
+ * of labels. This can be useful in machine learning algorithms, for instance,
+ * where a given set of labels is not taken from a monotonically increasing
+ * set. This can happen if they are filtered or if only a subset of the
+ * total labels are used in a dataset. This is also useful in graph algorithms
+ * where a set of vertices need to be labeled in a monotonically increasing
+ * order.
+ * @tparam Type the numeric type of the input and output arrays
+ * @tparam Lambda the type of an optional filter function, which determines
+ * which items in the array to map.
+ * @param[out] out the output monotonic array
+ * @param[in] in input label array
+ * @param[in] N number of elements in the input array
+ * @param[in] stream cuda stream to use
+ * @param[in] filter_op an optional function for specifying which values
+ * should have monotonically increasing labels applied to them.
+ * @param[in] zero_based force monotonic set to start at 0?
+ */
+template <typename Type, typename Lambda>
+void make_monotonic(
+  Type* out, Type* in, size_t N, cudaStream_t stream, Lambda filter_op, bool zero_based = false)
+{
+  detail::make_monotonic<Type, Lambda>(out, in, N, stream, filter_op, zero_based);
+}
+
+/**
+ * Maps an input array containing a series of numbers into a new array
+ * where numbers have been mapped to a monotonically increasing set
+ * of labels. This can be useful in machine learning algorithms, for instance,
+ * where a given set of labels is not taken from a monotonically increasing
+ * set. This can happen if they are filtered or if only a subset of the
+ * total labels are used in a dataset. This is also useful in graph algorithms
+ * where a set of vertices need to be labeled in a monotonically increasing
+ * order.
+ * @tparam Type the numeric type of the input and output arrays
+ * @param[out] out output label array with labels assigned monotonically
+ * @param[in] in input label array
+ * @param[in] N number of elements in the input array
+ * @param[in] stream cuda stream to use
+ * @param[in] zero_based force monotonic label set to start at 0?
+ */
+template <typename Type>
+void make_monotonic(Type* out, Type* in, size_t N, cudaStream_t stream, bool zero_based = false)
+{
+  detail::make_monotonic<Type>(out, in, N, stream, zero_based);
+}
+};  // namespace label
+};  // end namespace raft
diff --git a/cpp/include/raft/label/classlabels.cuh b/cpp/include/raft/label/detail/classlabels.cuh
similarity index 99%
rename from cpp/include/raft/label/classlabels.cuh
rename to cpp/include/raft/label/detail/classlabels.cuh
index fda4c02a1c..c805860759 100644
--- a/cpp/include/raft/label/classlabels.cuh
+++ b/cpp/include/raft/label/detail/classlabels.cuh
@@ -26,6 +26,7 @@
 
 namespace raft {
 namespace label {
+namespace detail {
 
 /**
  * Get unique class labels.
@@ -194,5 +195,7 @@ void make_monotonic(Type* out, Type* in, size_t N, cudaStream_t stream, bool zer
   make_monotonic<Type>(
     out, in, N, stream, [] __device__(Type val) { return false; }, zero_based);
 }
+
+};  // namespace detail
 };  // namespace label
 };  // end namespace raft
diff --git a/cpp/include/raft/label/merge_labels.cuh b/cpp/include/raft/label/detail/merge_labels.cuh
similarity index 99%
rename from cpp/include/raft/label/merge_labels.cuh
rename to cpp/include/raft/label/detail/merge_labels.cuh
index 9cd5a29951..bf03d1c738 100644
--- a/cpp/include/raft/label/merge_labels.cuh
+++ b/cpp/include/raft/label/detail/merge_labels.cuh
@@ -25,6 +25,7 @@
 
 namespace raft {
 namespace label {
+namespace detail {
 
 /** Note: this is one possible implementation where we represent the label
  *  equivalence graph implicitly using labels_a, labels_b and mask.
@@ -153,5 +154,6 @@ void merge_labels(value_idx* labels_a,
   RAFT_CUDA_TRY(cudaPeekAtLastError());
 }
 
+}  // namespace detail
 };  // namespace label
 };  // namespace raft
\ No newline at end of file
diff --git a/cpp/include/raft/label/merge_labels.hpp b/cpp/include/raft/label/merge_labels.hpp
new file mode 100644
index 0000000000..5ba8fe8a27
--- /dev/null
+++ b/cpp/include/raft/label/merge_labels.hpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/label/detail/merge_labels.cuh>
+
+namespace raft {
+namespace label {
+
+/**
+ * @brief Merge two labellings in-place, according to a core mask
+ *
+ * A labelling is a representation of disjoint sets (groups) where points that
+ * belong to the same group have the same label. It is assumed that group
+ * labels take values between 1 and N. labels relate to points, i.e a label i+1
+ * means that you belong to the same group as the point i.
+ * The special value MAX_LABEL is used to mark points that are not labelled.
+ *
+ * The two label arrays A and B induce two sets of groups over points 0..N-1.
+ * If a point is labelled i in A and j in B and the mask is true for this
+ * point, then i and j are equivalent labels and their groups are merged by
+ * relabeling the elements of both groups to have the same label. The new label
+ * is the smaller one from the original labels.
+ * It is required that if the mask is true for a point, this point is labelled
+ * (i.e its label is different than the special value MAX_LABEL).
+ *
+ * One use case is finding connected components: the two input label arrays can
+ * represent the connected components of graphs G_A and G_B, and the output
+ * would be the connected components labels of G_A \union G_B.
+ *
+ * @param[inout] labels_a    First input, and output label array (in-place)
+ * @param[in]    labels_b    Second input label array
+ * @param[in]    mask        Core point mask
+ * @param[out]   R           label equivalence map
+ * @param[in]    m           Working flag
+ * @param[in]    N           Number of points in the dataset
+ * @param[in]    stream      CUDA stream
+ */
+template <typename value_idx = int, int TPB_X = 256>
+void merge_labels(value_idx* labels_a,
+                  const value_idx* labels_b,
+                  const bool* mask,
+                  value_idx* R,
+                  bool* m,
+                  value_idx N,
+                  cudaStream_t stream)
+{
+  detail::merge_labels<value_idx, TPB_X>(labels_a, labels_b, mask, R, m, N, stream);
+}
+
+};  // namespace label
+};  // namespace raft
\ No newline at end of file
diff --git a/cpp/include/raft/lap/d_structs.h b/cpp/include/raft/lap/detail/d_structs.h
similarity index 100%
rename from cpp/include/raft/lap/d_structs.h
rename to cpp/include/raft/lap/detail/d_structs.h
diff --git a/cpp/include/raft/lap/lap_functions.cuh b/cpp/include/raft/lap/detail/lap_functions.cuh
similarity index 99%
rename from cpp/include/raft/lap/lap_functions.cuh
rename to cpp/include/raft/lap/detail/lap_functions.cuh
index ab4aa2df59..6c6b09e5d8 100644
--- a/cpp/include/raft/lap/lap_functions.cuh
+++ b/cpp/include/raft/lap/detail/lap_functions.cuh
@@ -28,7 +28,7 @@
 
 #include <raft/cudart_utils.h>
 #include <raft/handle.hpp>
-#include <raft/lap/lap_kernels.cuh>
+#include <raft/lap/detail/lap_kernels.cuh>
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
diff --git a/cpp/include/raft/lap/lap_kernels.cuh b/cpp/include/raft/lap/detail/lap_kernels.cuh
similarity index 99%
rename from cpp/include/raft/lap/lap_kernels.cuh
rename to cpp/include/raft/lap/detail/lap_kernels.cuh
index 328cbf3e74..b61d0bd269 100644
--- a/cpp/include/raft/lap/lap_kernels.cuh
+++ b/cpp/include/raft/lap/detail/lap_kernels.cuh
@@ -28,7 +28,6 @@
 
 #include <raft/cudart_utils.h>
 #include <raft/handle.hpp>
-#include <raft/mr/device/buffer.hpp>
 
 #include <thrust/for_each.h>
 
diff --git a/cpp/include/raft/lap/lap.cuh b/cpp/include/raft/lap/lap.hpp
similarity index 99%
rename from cpp/include/raft/lap/lap.cuh
rename to cpp/include/raft/lap/lap.hpp
index 42b898ebff..2350ebcddf 100644
--- a/cpp/include/raft/lap/lap.cuh
+++ b/cpp/include/raft/lap/lap.hpp
@@ -27,8 +27,8 @@
 #include <raft/handle.hpp>
 #include <rmm/device_uvector.hpp>
 
-#include "d_structs.h"
-#include "lap_functions.cuh"
+#include "detail/d_structs.h"
+#include "detail/lap_functions.cuh"
 
 namespace raft {
 namespace lap {
diff --git a/cpp/include/raft/linalg/detail/lanczos.hpp b/cpp/include/raft/linalg/detail/lanczos.hpp
index 3d8fde7e68..a2b7751a05 100644
--- a/cpp/include/raft/linalg/detail/lanczos.hpp
+++ b/cpp/include/raft/linalg/detail/lanczos.hpp
@@ -28,9 +28,9 @@
 #include "cublas_wrappers.hpp"
 #include <raft/cudart_utils.h>
 #include <raft/handle.hpp>
-#include <raft/spectral/lapack.hpp>
-#include <raft/spectral/matrix_wrappers.hpp>
-#include <raft/spectral/warn_dbg.hpp>
+#include <raft/spectral/detail/lapack.hpp>
+#include <raft/spectral/detail/matrix_wrappers.cuh>
+#include <raft/spectral/detail/warn_dbg.hpp>
 
 namespace raft {
 
diff --git a/cpp/include/raft/mr/buffer_base.hpp b/cpp/include/raft/mr/buffer_base.hpp
deleted file mode 100644
index 11724bed00..0000000000
--- a/cpp/include/raft/mr/buffer_base.hpp
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/cudart_utils.h>
-
-#include <cuda_runtime.h>
-
-#include <cstddef>
-#include <memory>
-#include <utility>
-
-namespace raft {
-namespace mr {
-
-/**
- * @brief Base for all RAII-based owning of temporary memory allocations. This
- *        class should ideally not be used by users directly, but instead via
- *        the child classes `device_buffer` and `host_buffer`.
- *
- * @tparam T          data type
- * @tparam AllocatorT The underly allocator object
- */
-template <typename T, typename AllocatorT>
-class buffer_base {
- public:
-  using size_type       = std::size_t;
-  using value_type      = T;
-  using iterator        = value_type*;
-  using const_iterator  = const value_type*;
-  using reference       = T&;
-  using const_reference = const T&;
-
-  buffer_base() = delete;
-
-  buffer_base(const buffer_base& other) = delete;
-
-  buffer_base& operator=(const buffer_base& other) = delete;
-
-  /**
-   * @brief Main ctor
-   *
-   * @param[in] allocator asynchronous allocator used for managing buffer life
-   * @param[in] stream    cuda stream where this allocation operations are async
-   * @param[in] n         size of the buffer (in number of elements)
-   */
-  buffer_base(std::shared_ptr<AllocatorT> allocator, cudaStream_t stream, size_type n = 0)
-    : data_(nullptr), size_(n), capacity_(n), stream_(stream), allocator_(std::move(allocator))
-  {
-    if (capacity_ > 0) {
-      data_ =
-        static_cast<value_type*>(allocator_->allocate(capacity_ * sizeof(value_type), stream_));
-      RAFT_CUDA_TRY(cudaStreamSynchronize(stream_));
-    }
-  }
-
-  ~buffer_base() { release(); }
-
-  value_type* data() { return data_; }
-
-  const value_type* data() const { return data_; }
-
-  size_type size() const { return size_; }
-
-  void clear() { size_ = 0; }
-
-  iterator begin() { return data_; }
-
-  const_iterator begin() const { return data_; }
-
-  iterator end() { return data_ + size_; }
-
-  const_iterator end() const { return data_ + size_; }
-
-  /**
-   * @brief Reserve new memory size for this buffer.
-   *
-   * It re-allocates a fresh buffer if the new requested capacity is more than
-   * the current one, copies the old buffer contents to this new buffer and
-   * removes the old one.
-   *
-   * @param[in] new_capacity new capacity (in number of elements)
-   * @{
-   */
-  void reserve(size_type new_capacity)
-  {
-    if (new_capacity > capacity_) {
-      auto* new_data =
-        static_cast<value_type*>(allocator_->allocate(new_capacity * sizeof(value_type), stream_));
-      if (size_ > 0) { raft::copy(new_data, data_, size_, stream_); }
-      // Only deallocate if we have allocated a pointer
-      if (nullptr != data_) {
-        allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_);
-      }
-      data_     = new_data;
-      capacity_ = new_capacity;
-    }
-  }
-
-  void reserve(size_type new_capacity, cudaStream_t stream)
-  {
-    set_stream(stream);
-    reserve(new_capacity);
-  }
-  /** @} */
-
-  /**
-   * @brief Resize the underlying buffer (uses `reserve` method internally)
-   *
-   * @param[in] new_size new buffer size
-   * @{
-   */
-  void resize(const size_type new_size)
-  {
-    reserve(new_size);
-    size_ = new_size;
-  }
-
-  void resize(const size_type new_size, cudaStream_t stream)
-  {
-    set_stream(stream);
-    resize(new_size);
-  }
-  /** @} */
-
-  /**
-   * @brief Deletes the underlying buffer
-   *
-   * If this method is not explicitly called, it will be during the destructor
-   * @{
-   */
-  void release()
-  {
-    if (nullptr != data_) {
-      allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_);
-    }
-    data_     = nullptr;
-    capacity_ = 0;
-    size_     = 0;
-  }
-
-  void release(cudaStream_t stream)
-  {
-    set_stream(stream);
-    release();
-  }
-  /** @} */
-
-  /**
-   * @brief returns the underlying allocator used
-   *
-   * @return the allocator pointer
-   */
-  std::shared_ptr<AllocatorT> get_allocator() const { return allocator_; }
-
-  /**
-   * @brief returns the underlying stream used
-   *
-   * @return the cuda stream
-   */
-  cudaStream_t get_stream() const { return stream_; }
-
- protected:
-  value_type* data_;
-
- private:
-  size_type size_;
-  size_type capacity_;
-  cudaStream_t stream_;
-  std::shared_ptr<AllocatorT> allocator_;
-
-  /**
-   * @brief Sets a new cuda stream where the future operations will be queued
-   *
-   * This method makes sure that the inter-stream dependencies are met and taken
-   * care of, before setting the input stream as a new stream for this buffer.
-   * Ideally, the same cuda stream passed during constructor is expected to be
-   * used throughout this buffer's lifetime, for performance.
-   *
-   * @param[in] stream new cuda stream to be set. If it is the same as the
-   *                   current one, then this method will be a no-op.
-   */
-  void set_stream(cudaStream_t stream)
-  {
-    if (stream_ != stream) {
-      cudaEvent_t event;
-      RAFT_CUDA_TRY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
-      RAFT_CUDA_TRY(cudaEventRecord(event, stream_));
-      RAFT_CUDA_TRY(cudaStreamWaitEvent(stream, event, 0));
-      stream_ = stream;
-      RAFT_CUDA_TRY(cudaEventDestroy(event));
-    }
-  }
-};  // class buffer_base
-
-};  // namespace mr
-};  // namespace raft
diff --git a/cpp/include/raft/mr/device/buffer.hpp b/cpp/include/raft/mr/device/buffer.hpp
deleted file mode 100644
index 9b5ff11c50..0000000000
--- a/cpp/include/raft/mr/device/buffer.hpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "allocator.hpp"
-#include <memory>
-#include <raft/mr/buffer_base.hpp>
-
-namespace raft {
-namespace mr {
-namespace device {
-
-/**
- * @brief RAII object owning a contiguous typed device buffer. The passed in
- *        allocator supports asynchronous allocation and deallocation so this
- *        can also be used for temporary memory
- *
- * @code{.cpp}
- * template<typename T>
- * void foo(..., cudaStream_t stream) {
- *   ...
- *   raft::mr::device::buffer<T> temp(stream, 0);
- *   ...
- *   temp.resize(n);
- *   kernelA<<<grid,block,0,stream>>>(...,temp.data(),...);
- *   kernelB<<<grid,block,0,stream>>>(...,temp.data(),...);
- *   temp.release();
- *   ...
- * }
- * @endcode
- */
-template <typename T>
-class buffer : public buffer_base<T, allocator> {
- public:
-  using size_type       = typename buffer_base<T, allocator>::size_type;
-  using value_type      = typename buffer_base<T, allocator>::value_type;
-  using iterator        = typename buffer_base<T, allocator>::iterator;
-  using const_iterator  = typename buffer_base<T, allocator>::const_iterator;
-  using reference       = typename buffer_base<T, allocator>::reference;
-  using const_reference = typename buffer_base<T, allocator>::const_reference;
-
-  buffer() = delete;
-
-  buffer(const buffer& other) = delete;
-
-  buffer& operator=(const buffer& other) = delete;
-
-  buffer(std::shared_ptr<allocator> alloc, cudaStream_t stream, size_type n = 0)
-    : buffer_base<T, device::allocator>(alloc, stream, n)
-  {
-  }
-};  // class buffer
-
-};  // namespace device
-};  // namespace mr
-};  // namespace raft
diff --git a/cpp/include/raft/mr/host/buffer.hpp b/cpp/include/raft/mr/host/buffer.hpp
deleted file mode 100644
index 204b384719..0000000000
--- a/cpp/include/raft/mr/host/buffer.hpp
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "allocator.hpp"
-#include <memory>
-#include <raft/mr/buffer_base.hpp>
-#include <raft/mr/device/buffer.hpp>
-
-namespace raft {
-namespace mr {
-namespace host {
-
-/**
- * @brief RAII object owning a contigous typed host buffer (aka pinned memory).
- *        The passed in allocator supports asynchronus allocation and
- *        deallocation so this can also be used for temporary memory
- *
- * @code{.cpp}
- * template<typename T>
- * void foo(const T* in_d , T* out_d, ..., cudaStream_t stream) {
- *   ...
- *   raft::mr::host::buffer<T> temp(stream, 0);
- *   ...
- *   temp.resize(n);
- *   raft::copy(temp.data(), in_d, temp.size());
- *   ...
- *   raft::copy(out_d, temp.data(), temp.size());
- *   temp.release(stream);
- *   ...
- * }
- * @endcode
- */
-template <typename T>
-class buffer : public buffer_base<T, allocator> {
- public:
-  using size_type       = typename buffer_base<T, allocator>::size_type;
-  using value_type      = typename buffer_base<T, allocator>::value_type;
-  using iterator        = typename buffer_base<T, allocator>::iterator;
-  using const_iterator  = typename buffer_base<T, allocator>::const_iterator;
-  using reference       = typename buffer_base<T, allocator>::reference;
-  using const_reference = typename buffer_base<T, allocator>::const_reference;
-
-  buffer() = delete;
-
-  buffer(const buffer& other) = delete;
-
-  buffer& operator=(const buffer& other) = delete;
-
-  buffer(std::shared_ptr<allocator> alloc, const device::buffer<T>& other)
-    : buffer_base<T, allocator>(alloc, other.get_stream(), other.size())
-  {
-    if (other.size() > 0) { raft::copy(data_, other.data(), other.size(), other.get_stream()); }
-  }
-
-  buffer(std::shared_ptr<allocator> alloc, cudaStream_t stream, size_type n = 0)
-    : buffer_base<T, allocator>(alloc, stream, n)
-  {
-  }
-
-  reference operator[](size_type pos) { return data_[pos]; }
-
-  const_reference operator[](size_type pos) const { return data_[pos]; }
-
- private:
-  using buffer_base<T, allocator>::data_;
-};
-
-};  // namespace host
-};  // namespace mr
-};  // namespace raft
diff --git a/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh b/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh
index 046b65a0f0..020de9e014 100644
--- a/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh
+++ b/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh
@@ -21,7 +21,6 @@
 
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
-#include <raft/mr/device/buffer.hpp>
 #include <raft/sparse/detail/cusparse_wrappers.h>
 
 #include "../../csr.hpp"
diff --git a/cpp/include/raft/sparse/distance/detail/utils.cuh b/cpp/include/raft/sparse/distance/detail/utils.cuh
index 8c01b33c1e..06c034ad9f 100644
--- a/cpp/include/raft/sparse/distance/detail/utils.cuh
+++ b/cpp/include/raft/sparse/distance/detail/utils.cuh
@@ -17,7 +17,6 @@
 #pragma once
 
 #include <raft/mr/device/allocator.hpp>
-#include <raft/mr/device/buffer.hpp>
 
 #include <cub/cub.cuh>
 
diff --git a/cpp/include/raft/sparse/hierarchy/detail/agglomerative.cuh b/cpp/include/raft/sparse/hierarchy/detail/agglomerative.cuh
index 31ebe38d85..105f1cc9f6 100644
--- a/cpp/include/raft/sparse/hierarchy/detail/agglomerative.cuh
+++ b/cpp/include/raft/sparse/hierarchy/detail/agglomerative.cuh
@@ -19,7 +19,6 @@
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
 #include <raft/handle.hpp>
-#include <raft/mr/device/buffer.hpp>
 
 #include <rmm/device_uvector.hpp>
 
diff --git a/cpp/include/raft/sparse/hierarchy/detail/connectivities.cuh b/cpp/include/raft/sparse/hierarchy/detail/connectivities.cuh
index bd96ca8649..fe58246545 100644
--- a/cpp/include/raft/sparse/hierarchy/detail/connectivities.cuh
+++ b/cpp/include/raft/sparse/hierarchy/detail/connectivities.cuh
@@ -24,7 +24,6 @@
 #include <rmm/device_uvector.hpp>
 
 #include <raft/distance/distance_type.hpp>
-#include <raft/mr/device/buffer.hpp>
 #include <raft/sparse/convert/csr.hpp>
 #include <raft/sparse/coo.hpp>
 #include <raft/sparse/hierarchy/common.h>
diff --git a/cpp/include/raft/sparse/hierarchy/detail/mst.cuh b/cpp/include/raft/sparse/hierarchy/detail/mst.cuh
index 7173c76c08..10e9d04c0d 100644
--- a/cpp/include/raft/sparse/hierarchy/detail/mst.cuh
+++ b/cpp/include/raft/sparse/hierarchy/detail/mst.cuh
@@ -19,7 +19,6 @@
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
 
-#include <raft/mr/device/buffer.hpp>
 #include <raft/sparse/mst/mst.cuh>
 #include <raft/sparse/op/sort.hpp>
 #include <raft/sparse/selection/connect_components.hpp>
diff --git a/cpp/include/raft/sparse/linalg/detail/spectral.cuh b/cpp/include/raft/sparse/linalg/detail/spectral.cuh
index 5b43798e2e..95d9c0d1c5 100644
--- a/cpp/include/raft/sparse/linalg/detail/spectral.cuh
+++ b/cpp/include/raft/sparse/linalg/detail/spectral.cuh
@@ -17,7 +17,8 @@
 #include <raft/cudart_utils.h>
 
 #include <raft/cuda_utils.cuh>
-#include <raft/sparse/detail/cusparse_wrappers.h>
+#include <raft/spectral/cluster_solvers.hpp>
+#include <raft/spectral/eigen_solvers.hpp>
 #include <raft/spectral/partition.hpp>
 #include <rmm/device_uvector.hpp>
 
@@ -70,11 +71,12 @@ void fit_embedding(const raft::handle_t& handle,
   value_type tol          = 0.01;
   index_type restart_iter = 15 + neigvs;  // what cugraph is using
 
-  raft::eigen_solver_config_t<index_type, value_type> cfg{neigvs, maxiter, restart_iter, tol};
+  raft::spectral::eigen_solver_config_t<index_type, value_type> cfg{
+    neigvs, maxiter, restart_iter, tol};
 
   cfg.seed = seed;
 
-  raft::lanczos_solver_t<index_type, value_type> eig_solver{cfg};
+  raft::spectral::lanczos_solver_t<index_type, value_type> eig_solver{cfg};
 
   // cluster computation here is irrelevant,
   // hence define a no-op such solver to
diff --git a/cpp/include/raft/sparse/op/detail/reduce.cuh b/cpp/include/raft/sparse/op/detail/reduce.cuh
index e4a64fbb51..988f478f2b 100644
--- a/cpp/include/raft/sparse/op/detail/reduce.cuh
+++ b/cpp/include/raft/sparse/op/detail/reduce.cuh
@@ -20,7 +20,6 @@
 
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
-#include <raft/mr/device/buffer.hpp>
 #include <raft/sparse/detail/cusparse_wrappers.h>
 
 #include <raft/device_atomics.cuh>
diff --git a/cpp/include/raft/sparse/selection/detail/connect_components.cuh b/cpp/include/raft/sparse/selection/detail/connect_components.cuh
index afbb7f17b3..2b9ca2d8b5 100644
--- a/cpp/include/raft/sparse/selection/detail/connect_components.cuh
+++ b/cpp/include/raft/sparse/selection/detail/connect_components.cuh
@@ -17,9 +17,8 @@
 #include <cub/cub.cuh>
 
 #include <raft/distance/fused_l2_nn.hpp>
-#include <raft/label/classlabels.cuh>
+#include <raft/label/classlabels.hpp>
 #include <raft/linalg/norm.hpp>
-#include <raft/mr/device/buffer.hpp>
 #include <raft/sparse/convert/csr.hpp>
 #include <raft/sparse/coo.hpp>
 #include <raft/sparse/linalg/symmetrize.hpp>
diff --git a/cpp/include/raft/sparse/selection/detail/knn.cuh b/cpp/include/raft/sparse/selection/detail/knn.cuh
index 3de10a2782..82a689fe00 100644
--- a/cpp/include/raft/sparse/selection/detail/knn.cuh
+++ b/cpp/include/raft/sparse/selection/detail/knn.cuh
@@ -23,7 +23,6 @@
 #include <raft/distance/distance_type.hpp>
 #include <raft/linalg/unary_op.hpp>
 #include <raft/matrix/matrix.hpp>
-#include <raft/mr/device/buffer.hpp>
 
 #include <raft/sparse/coo.hpp>
 #include <raft/sparse/csr.hpp>
diff --git a/cpp/include/raft/spatial/knn/ann.hpp b/cpp/include/raft/spatial/knn/ann.hpp
index 6ce9463e43..5f64a8d1b5 100644
--- a/cpp/include/raft/spatial/knn/ann.hpp
+++ b/cpp/include/raft/spatial/knn/ann.hpp
@@ -22,8 +22,6 @@
 #include <faiss/gpu/GpuIndex.h>
 #include <raft/spatial/knn/faiss_mr.hpp>
 
-#include <raft/mr/device/buffer.hpp>
-
 namespace raft {
 namespace spatial {
 namespace knn {
diff --git a/cpp/include/raft/spatial/knn/knn.hpp b/cpp/include/raft/spatial/knn/knn.hpp
index b29c4cc51c..59df75ba36 100644
--- a/cpp/include/raft/spatial/knn/knn.hpp
+++ b/cpp/include/raft/spatial/knn/knn.hpp
@@ -19,14 +19,10 @@
 #include "detail/knn_brute_force_faiss.cuh"
 #include "detail/selection_faiss.cuh"
 
-#include <raft/mr/device/buffer.hpp>
-
 namespace raft {
 namespace spatial {
 namespace knn {
 
-using deviceAllocator = raft::mr::device::allocator;
-
 /**
  * Performs a k-select across row partitioned index/distance
  * matrices formatted like the following:
diff --git a/cpp/include/raft/spectral/cluster_solvers.hpp b/cpp/include/raft/spectral/cluster_solvers.hpp
index 221a9679d4..cc25e66cae 100644
--- a/cpp/include/raft/spectral/cluster_solvers.hpp
+++ b/cpp/include/raft/spectral/cluster_solvers.hpp
@@ -15,10 +15,11 @@
  */
 #pragma once
 
-#include <raft/spectral/kmeans.hpp>
+#include <raft/cluster/kmeans.hpp>
 #include <utility>  // for std::pair
 
 namespace raft {
+namespace spectral {
 
 using namespace matrix;
 
@@ -52,17 +53,18 @@ struct kmeans_solver_t {
     RAFT_EXPECTS(codes != nullptr, "Null codes buffer.");
     value_type_t residual{};
     index_type_t iters{};
-    kmeans(handle,
-           n_obs_vecs,
-           dim,
-           config_.n_clusters,
-           config_.tol,
-           config_.maxIter,
-           obs,
-           codes,
-           residual,
-           iters,
-           config_.seed);
+
+    raft::cluster::kmeans(handle,
+                          n_obs_vecs,
+                          dim,
+                          config_.n_clusters,
+                          config_.tol,
+                          config_.maxIter,
+                          obs,
+                          codes,
+                          residual,
+                          iters,
+                          config_.seed);
     return std::make_pair(residual, iters);
   }
 
@@ -71,4 +73,6 @@ struct kmeans_solver_t {
  private:
   cluster_solver_config_t<index_type_t, value_type_t, size_type_t> config_;
 };
+
+}  // namespace spectral
 }  // namespace raft
diff --git a/cpp/include/raft/spectral/lapack.hpp b/cpp/include/raft/spectral/detail/lapack.hpp
similarity index 100%
rename from cpp/include/raft/spectral/lapack.hpp
rename to cpp/include/raft/spectral/detail/lapack.hpp
diff --git a/cpp/include/raft/spectral/matrix_wrappers.hpp b/cpp/include/raft/spectral/detail/matrix_wrappers.cuh
similarity index 100%
rename from cpp/include/raft/spectral/matrix_wrappers.hpp
rename to cpp/include/raft/spectral/detail/matrix_wrappers.cuh
diff --git a/cpp/include/raft/spectral/detail/modularity_maximization.hpp b/cpp/include/raft/spectral/detail/modularity_maximization.hpp
new file mode 100644
index 0000000000..a55dfbe67f
--- /dev/null
+++ b/cpp/include/raft/spectral/detail/modularity_maximization.hpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <math.h>
+#include <stdio.h>
+
+#include <cuda.h>
+#include <thrust/fill.h>
+#include <thrust/reduce.h>
+#include <thrust/transform.h>
+
+#include <tuple>
+
+#include <raft/spectral/cluster_solvers.hpp>
+#include <raft/spectral/detail/spectral_util.cuh>
+#include <raft/spectral/eigen_solvers.hpp>
+
+#ifdef COLLECT_TIME_STATISTICS
+#include <cuda_profiler_api.h>
+#include <stddef.h>
+#include <sys/resource.h>
+#include <sys/sysinfo.h>
+#include <sys/time.h>
+#endif
+
+#ifdef COLLECT_TIME_STATISTICS
+static double timer(void)
+{
+  struct timeval tv;
+  cudaDeviceSynchronize();
+  gettimeofday(&tv, NULL);
+  return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
+}
+#endif
+
+namespace raft {
+namespace spectral {
+namespace detail {
+
+using namespace matrix;
+using namespace linalg;
+
+// =========================================================
+// Spectral modularity_maximization
+// =========================================================
+
+/** Compute partition for a weighted undirected graph. This
+ *  partition attempts to minimize the cost function:
+ *    Cost = \sum_i (Edges cut by ith partition)/(Vertices in ith partition)
+ *
+ *  @param G Weighted graph in CSR format
+ *  @param nClusters Number of partitions.
+ *  @param nEigVecs Number of eigenvectors to compute.
+ *  @param maxIter_lanczos Maximum number of Lanczos iterations.
+ *  @param restartIter_lanczos Maximum size of Lanczos system before
+ *    implicit restart.
+ *  @param tol_lanczos Convergence tolerance for Lanczos method.
+ *  @param maxIter_kmeans Maximum number of k-means iterations.
+ *  @param tol_kmeans Convergence tolerance for k-means algorithm.
+ *  @param clusters (Output, device memory, n entries) Cluster
+ *    assignments.
+ *  @param iters_lanczos On exit, number of Lanczos iterations
+ *    performed.
+ *  @param iters_kmeans On exit, number of k-means iterations
+ *    performed.
+ *  @return error flag.
+ */
+template <typename vertex_t, typename weight_t, typename EigenSolver, typename ClusterSolver>
+std::tuple<vertex_t, weight_t, vertex_t> modularity_maximization(
+  handle_t const& handle,
+  sparse_matrix_t<vertex_t, weight_t> const& csr_m,
+  EigenSolver const& eigen_solver,
+  ClusterSolver const& cluster_solver,
+  vertex_t* __restrict__ clusters,
+  weight_t* eigVals,
+  weight_t* eigVecs)
+{
+  RAFT_EXPECTS(clusters != nullptr, "Null clusters buffer.");
+  RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer.");
+  RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer.");
+
+  auto stream   = handle.get_stream();
+  auto cublas_h = handle.get_cublas_handle();
+
+  std::tuple<vertex_t, weight_t, vertex_t>
+    stats;  // # iters eigen solver, cluster solver residual, # iters cluster solver
+
+  vertex_t n = csr_m.nrows_;
+
+  // Compute eigenvectors of Modularity Matrix
+
+  // Initialize Modularity Matrix
+  modularity_matrix_t<vertex_t, weight_t> B{handle, csr_m};
+
+  auto eigen_config = eigen_solver.get_config();
+  auto nEigVecs     = eigen_config.n_eigVecs;
+
+  // Compute eigenvectors corresponding to largest eigenvalues
+  std::get<0>(stats) = eigen_solver.solve_largest_eigenvectors(handle, B, eigVals, eigVecs);
+
+  // Whiten eigenvector matrix
+  transform_eigen_matrix(handle, n, nEigVecs, eigVecs);
+
+  // notice that at this point the matrix has already been transposed, so we are scaling
+  // columns
+  scale_obs(nEigVecs, n, eigVecs);
+  RAFT_CHECK_CUDA(stream);
+
+  // Find partition clustering
+  auto pair_cluster = cluster_solver.solve(handle, n, nEigVecs, eigVecs, clusters);
+
+  std::get<1>(stats) = pair_cluster.first;
+  std::get<2>(stats) = pair_cluster.second;
+
+  return stats;
+}
+//===================================================
+// Analysis of graph partition
+// =========================================================
+
+/// Compute modularity
+/** This function determines the modularity based on a graph and cluster assignments
+ *  @param G Weighted graph in CSR format
+ *  @param nClusters Number of clusters.
+ *  @param clusters (Input, device memory, n entries) Cluster assignments.
+ *  @param modularity On exit, modularity
+ */
+template <typename vertex_t, typename weight_t>
+void analyzeModularity(handle_t const& handle,
+                       sparse_matrix_t<vertex_t, weight_t> const& csr_m,
+                       vertex_t nClusters,
+                       vertex_t const* __restrict__ clusters,
+                       weight_t& modularity)
+{
+  RAFT_EXPECTS(clusters != nullptr, "Null clusters buffer.");
+
+  vertex_t i;
+  vertex_t n = csr_m.nrows_;
+  weight_t partModularity, clustersize;
+
+  auto cublas_h = handle.get_cublas_handle();
+  auto stream   = handle.get_stream();
+
+  // Device memory
+  vector_t<weight_t> part_i(handle, n);
+  vector_t<weight_t> Bx(handle, n);
+
+  // Initialize cuBLAS
+  RAFT_CUBLAS_TRY(cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream));
+
+  // Initialize Modularity
+  modularity_matrix_t<vertex_t, weight_t> B{handle, csr_m};
+
+  // Initialize output
+  modularity = 0;
+
+  // Iterate through partitions
+  for (i = 0; i < nClusters; ++i) {
+    if (!construct_indicator(handle, i, n, clustersize, partModularity, clusters, part_i, Bx, B)) {
+      WARNING("empty partition");
+      continue;
+    }
+
+    // Record results
+    modularity += partModularity;
+  }
+
+  modularity = modularity / B.diagonal_.nrm1();
+}
+
+}  // namespace detail
+}  // namespace spectral
+}  // namespace raft
diff --git a/cpp/include/raft/spectral/detail/partition.hpp b/cpp/include/raft/spectral/detail/partition.hpp
new file mode 100644
index 0000000000..b7c811d5a5
--- /dev/null
+++ b/cpp/include/raft/spectral/detail/partition.hpp
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <math.h>
+#include <stdio.h>
+
+#include <cuda.h>
+#include <thrust/fill.h>
+#include <thrust/reduce.h>
+#include <thrust/transform.h>
+
+#include <tuple>
+
+#include <raft/spectral/cluster_solvers.hpp>
+#include <raft/spectral/detail/spectral_util.cuh>
+#include <raft/spectral/eigen_solvers.hpp>
+
+namespace raft {
+namespace spectral {
+namespace detail {
+
+using namespace matrix;
+using namespace linalg;
+
+// =========================================================
+// Spectral partitioner
+// =========================================================
+
+/// Compute spectral graph partition
+/** Compute partition for a weighted undirected graph. This
+ *  partition attempts to minimize the cost function:
+ *    Cost = \sum_i (Edges cut by ith partition)/(Vertices in ith partition)
+ *
+ *  @param G Weighted graph in CSR format
+ *  @param nClusters Number of partitions.
+ *  @param nEigVecs Number of eigenvectors to compute.
+ *  @param maxIter_lanczos Maximum number of Lanczos iterations.
+ *  @param restartIter_lanczos Maximum size of Lanczos system before
+ *    implicit restart.
+ *  @param tol_lanczos Convergence tolerance for Lanczos method.
+ *  @param maxIter_kmeans Maximum number of k-means iterations.
+ *  @param tol_kmeans Convergence tolerance for k-means algorithm.
+ *  @param clusters (Output, device memory, n entries) Partition
+ *    assignments.
+ *  @param iters_lanczos On exit, number of Lanczos iterations
+ *    performed.
+ *  @param iters_kmeans On exit, number of k-means iterations
+ *    performed.
+ *  @return statistics: number of eigensolver iterations, .
+ */
+template <typename vertex_t, typename weight_t, typename EigenSolver, typename ClusterSolver>
+std::tuple<vertex_t, weight_t, vertex_t> partition(handle_t const& handle,
+                                                   sparse_matrix_t<vertex_t, weight_t> const& csr_m,
+                                                   EigenSolver const& eigen_solver,
+                                                   ClusterSolver const& cluster_solver,
+                                                   vertex_t* __restrict__ clusters,
+                                                   weight_t* eigVals,
+                                                   weight_t* eigVecs)
+{
+  RAFT_EXPECTS(clusters != nullptr, "Null clusters buffer.");
+  RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer.");
+  RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer.");
+
+  auto stream   = handle.get_stream();
+  auto cublas_h = handle.get_cublas_handle();
+
+  std::tuple<vertex_t, weight_t, vertex_t>
+    stats;  //{iters_eig_solver,residual_cluster,iters_cluster_solver} // # iters eigen solver,
+            // cluster solver residual, # iters cluster solver
+
+  vertex_t n = csr_m.nrows_;
+
+  // -------------------------------------------------------
+  // Spectral partitioner
+  // -------------------------------------------------------
+
+  // Compute eigenvectors of Laplacian
+
+  // Initialize Laplacian
+  /// sparse_matrix_t<vertex_t, weight_t> A{handle, graph};
+  laplacian_matrix_t<vertex_t, weight_t> L{handle, csr_m};
+
+  auto eigen_config = eigen_solver.get_config();
+  auto nEigVecs     = eigen_config.n_eigVecs;
+
+  // Compute smallest eigenvalues and eigenvectors
+  std::get<0>(stats) = eigen_solver.solve_smallest_eigenvectors(handle, L, eigVals, eigVecs);
+
+  // Whiten eigenvector matrix
+  transform_eigen_matrix(handle, n, nEigVecs, eigVecs);
+
+  // Find partition clustering
+  auto pair_cluster = cluster_solver.solve(handle, n, nEigVecs, eigVecs, clusters);
+
+  std::get<1>(stats) = pair_cluster.first;
+  std::get<2>(stats) = pair_cluster.second;
+
+  return stats;
+}
+
+// =========================================================
+// Analysis of graph partition
+// =========================================================
+
+/// Compute cost function for partition
+/** This function determines the edges cut by a partition and a cost
+ *  function:
+ *    Cost = \sum_i (Edges cut by ith partition)/(Vertices in ith partition)
+ *  Graph is assumed to be weighted and undirected.
+ *
+ *  @param G Weighted graph in CSR format
+ *  @param nClusters Number of partitions.
+ *  @param clusters (Input, device memory, n entries) Partition
+ *    assignments.
+ *  @param edgeCut On exit, weight of edges cut by partition.
+ *  @param cost On exit, partition cost function.
+ *  @return error flag.
+ */
+template <typename vertex_t, typename weight_t>
+void analyzePartition(handle_t const& handle,
+                      sparse_matrix_t<vertex_t, weight_t> const& csr_m,
+                      vertex_t nClusters,
+                      const vertex_t* __restrict__ clusters,
+                      weight_t& edgeCut,
+                      weight_t& cost)
+{
+  RAFT_EXPECTS(clusters != nullptr, "Null clusters buffer.");
+
+  vertex_t i;
+  vertex_t n = csr_m.nrows_;
+
+  auto stream   = handle.get_stream();
+  auto cublas_h = handle.get_cublas_handle();
+
+  weight_t partEdgesCut, clustersize;
+
+  // Device memory
+  vector_t<weight_t> part_i(handle, n);
+  vector_t<weight_t> Lx(handle, n);
+
+  // Initialize cuBLAS
+  RAFT_CUBLAS_TRY(cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream));
+
+  // Initialize Laplacian
+  /// sparse_matrix_t<vertex_t, weight_t> A{handle, graph};
+  laplacian_matrix_t<vertex_t, weight_t> L{handle, csr_m};
+
+  // Initialize output
+  cost    = 0;
+  edgeCut = 0;
+
+  // Iterate through partitions
+  for (i = 0; i < nClusters; ++i) {
+    // Construct indicator vector for ith partition
+    if (!construct_indicator(handle, i, n, clustersize, partEdgesCut, clusters, part_i, Lx, L)) {
+      WARNING("empty partition");
+      continue;
+    }
+
+    // Record results
+    cost += partEdgesCut / clustersize;
+    edgeCut += partEdgesCut / 2;
+  }
+}
+
+}  // namespace detail
+}  // namespace spectral
+}  // namespace raft
diff --git a/cpp/include/raft/spectral/spectral_util.hpp b/cpp/include/raft/spectral/detail/spectral_util.cuh
similarity index 100%
rename from cpp/include/raft/spectral/spectral_util.hpp
rename to cpp/include/raft/spectral/detail/spectral_util.cuh
diff --git a/cpp/include/raft/spectral/warn_dbg.hpp b/cpp/include/raft/spectral/detail/warn_dbg.hpp
similarity index 100%
rename from cpp/include/raft/spectral/warn_dbg.hpp
rename to cpp/include/raft/spectral/detail/warn_dbg.hpp
diff --git a/cpp/include/raft/spectral/eigen_solvers.hpp b/cpp/include/raft/spectral/eigen_solvers.hpp
index 156b996586..192dc15a6b 100644
--- a/cpp/include/raft/spectral/eigen_solvers.hpp
+++ b/cpp/include/raft/spectral/eigen_solvers.hpp
@@ -18,6 +18,7 @@
 #include <raft/linalg/lanczos.hpp>
 
 namespace raft {
+namespace spectral {
 
 using namespace matrix;
 
@@ -95,4 +96,6 @@ struct lanczos_solver_t {
  private:
   eigen_solver_config_t<index_type_t, value_type_t, size_type_t> config_;
 };
+
+}  // namespace spectral
 }  // namespace raft
diff --git a/cpp/include/raft/spectral/modularity_maximization.hpp b/cpp/include/raft/spectral/modularity_maximization.hpp
index 8188a772b8..466851c74f 100644
--- a/cpp/include/raft/spectral/modularity_maximization.hpp
+++ b/cpp/include/raft/spectral/modularity_maximization.hpp
@@ -16,44 +16,13 @@
 
 #pragma once
 
-#include <math.h>
-#include <stdio.h>
-
-#include <cuda.h>
-#include <thrust/fill.h>
-#include <thrust/reduce.h>
-#include <thrust/transform.h>
-
 #include <tuple>
 
-#include <raft/spectral/cluster_solvers.hpp>
-#include <raft/spectral/eigen_solvers.hpp>
-#include <raft/spectral/spectral_util.hpp>
-
-#ifdef COLLECT_TIME_STATISTICS
-#include <cuda_profiler_api.h>
-#include <stddef.h>
-#include <sys/resource.h>
-#include <sys/sysinfo.h>
-#include <sys/time.h>
-#endif
-
-#ifdef COLLECT_TIME_STATISTICS
-static double timer(void)
-{
-  struct timeval tv;
-  cudaDeviceSynchronize();
-  gettimeofday(&tv, NULL);
-  return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
-}
-#endif
+#include <raft/spectral/detail/modularity_maximization.hpp>
 
 namespace raft {
 namespace spectral {
 
-using namespace matrix;
-using namespace linalg;
-
 // =========================================================
 // Spectral modularity_maximization
 // =========================================================
@@ -89,44 +58,8 @@ std::tuple<vertex_t, weight_t, vertex_t> modularity_maximization(
   weight_t* eigVals,
   weight_t* eigVecs)
 {
-  RAFT_EXPECTS(clusters != nullptr, "Null clusters buffer.");
-  RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer.");
-  RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer.");
-
-  auto stream   = handle.get_stream();
-  auto cublas_h = handle.get_cublas_handle();
-
-  std::tuple<vertex_t, weight_t, vertex_t>
-    stats;  // # iters eigen solver, cluster solver residual, # iters cluster solver
-
-  vertex_t n = csr_m.nrows_;
-
-  // Compute eigenvectors of Modularity Matrix
-
-  // Initialize Modularity Matrix
-  modularity_matrix_t<vertex_t, weight_t> B{handle, csr_m};
-
-  auto eigen_config = eigen_solver.get_config();
-  auto nEigVecs     = eigen_config.n_eigVecs;
-
-  // Compute eigenvectors corresponding to largest eigenvalues
-  std::get<0>(stats) = eigen_solver.solve_largest_eigenvectors(handle, B, eigVals, eigVecs);
-
-  // Whiten eigenvector matrix
-  transform_eigen_matrix(handle, n, nEigVecs, eigVecs);
-
-  // notice that at this point the matrix has already been transposed, so we are scaling
-  // columns
-  scale_obs(nEigVecs, n, eigVecs);
-  RAFT_CHECK_CUDA(stream);
-
-  // Find partition clustering
-  auto pair_cluster = cluster_solver.solve(handle, n, nEigVecs, eigVecs, clusters);
-
-  std::get<1>(stats) = pair_cluster.first;
-  std::get<2>(stats) = pair_cluster.second;
-
-  return stats;
+  return detail::modularity_maximization<vertex_t, weight_t, EigenSolver, ClusterSolver>(
+    handle, csr_m, eigen_solver, cluster_solver, clusters, eigVals, eigVecs);
 }
 //===================================================
 // Analysis of graph partition
@@ -146,42 +79,7 @@ void analyzeModularity(handle_t const& handle,
                        vertex_t const* __restrict__ clusters,
                        weight_t& modularity)
 {
-  RAFT_EXPECTS(clusters != nullptr, "Null clusters buffer.");
-
-  vertex_t i;
-  vertex_t n = csr_m.nrows_;
-  weight_t partModularity, clustersize;
-
-  auto cublas_h = handle.get_cublas_handle();
-  auto stream   = handle.get_stream();
-
-  // Device memory
-  vector_t<weight_t> part_i(handle, n);
-  vector_t<weight_t> Bx(handle, n);
-
-  // Initialize cuBLAS
-  // #TODO: Use public API when ready
-  RAFT_CUBLAS_TRY(
-    raft::linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream));
-
-  // Initialize Modularity
-  modularity_matrix_t<vertex_t, weight_t> B{handle, csr_m};
-
-  // Initialize output
-  modularity = 0;
-
-  // Iterate through partitions
-  for (i = 0; i < nClusters; ++i) {
-    if (!construct_indicator(handle, i, n, clustersize, partModularity, clusters, part_i, Bx, B)) {
-      WARNING("empty partition");
-      continue;
-    }
-
-    // Record results
-    modularity += partModularity;
-  }
-
-  modularity = modularity / B.diagonal_.nrm1();
+  detail::analyzeModularity<vertex_t, weight_t>(handle, csr_m, nClusters, clusters, modularity);
 }
 
 }  // namespace spectral
diff --git a/cpp/include/raft/spectral/partition.hpp b/cpp/include/raft/spectral/partition.hpp
index 5b1478baa9..597ef530a2 100644
--- a/cpp/include/raft/spectral/partition.hpp
+++ b/cpp/include/raft/spectral/partition.hpp
@@ -15,26 +15,13 @@
  */
 #pragma once
 
-#include <math.h>
-#include <stdio.h>
-
-#include <cuda.h>
-#include <thrust/fill.h>
-#include <thrust/reduce.h>
-#include <thrust/transform.h>
-
 #include <tuple>
 
-#include <raft/spectral/cluster_solvers.hpp>
-#include <raft/spectral/eigen_solvers.hpp>
-#include <raft/spectral/spectral_util.hpp>
+#include <raft/spectral/detail/partition.hpp>
 
 namespace raft {
 namespace spectral {
 
-using namespace matrix;
-using namespace linalg;
-
 // =========================================================
 // Spectral partitioner
 // =========================================================
@@ -70,45 +57,8 @@ std::tuple<vertex_t, weight_t, vertex_t> partition(handle_t const& handle,
                                                    weight_t* eigVals,
                                                    weight_t* eigVecs)
 {
-  RAFT_EXPECTS(clusters != nullptr, "Null clusters buffer.");
-  RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer.");
-  RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer.");
-
-  auto stream   = handle.get_stream();
-  auto cublas_h = handle.get_cublas_handle();
-
-  std::tuple<vertex_t, weight_t, vertex_t>
-    stats;  //{iters_eig_solver,residual_cluster,iters_cluster_solver} // # iters eigen solver,
-            // cluster solver residual, # iters cluster solver
-
-  vertex_t n = csr_m.nrows_;
-
-  // -------------------------------------------------------
-  // Spectral partitioner
-  // -------------------------------------------------------
-
-  // Compute eigenvectors of Laplacian
-
-  // Initialize Laplacian
-  /// sparse_matrix_t<vertex_t, weight_t> A{handle, graph};
-  laplacian_matrix_t<vertex_t, weight_t> L{handle, csr_m};
-
-  auto eigen_config = eigen_solver.get_config();
-  auto nEigVecs     = eigen_config.n_eigVecs;
-
-  // Compute smallest eigenvalues and eigenvectors
-  std::get<0>(stats) = eigen_solver.solve_smallest_eigenvectors(handle, L, eigVals, eigVecs);
-
-  // Whiten eigenvector matrix
-  transform_eigen_matrix(handle, n, nEigVecs, eigVecs);
-
-  // Find partition clustering
-  auto pair_cluster = cluster_solver.solve(handle, n, nEigVecs, eigVecs, clusters);
-
-  std::get<1>(stats) = pair_cluster.first;
-  std::get<2>(stats) = pair_cluster.second;
-
-  return stats;
+  return detail::partition<vertex_t, weight_t, EigenSolver, ClusterSolver>(
+    handle, csr_m, eigen_solver, cluster_solver, clusters, eigVals, eigVecs);
 }
 
 // =========================================================
@@ -137,43 +87,7 @@ void analyzePartition(handle_t const& handle,
                       weight_t& edgeCut,
                       weight_t& cost)
 {
-  RAFT_EXPECTS(clusters != nullptr, "Null clusters buffer.");
-
-  vertex_t i;
-  vertex_t n = csr_m.nrows_;
-
-  auto stream   = handle.get_stream();
-  auto cublas_h = handle.get_cublas_handle();
-
-  weight_t partEdgesCut, clustersize;
-
-  // Device memory
-  vector_t<weight_t> part_i(handle, n);
-  vector_t<weight_t> Lx(handle, n);
-
-  // Initialize cuBLAS
-  RAFT_CUBLAS_TRY(cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream));
-
-  // Initialize Laplacian
-  /// sparse_matrix_t<vertex_t, weight_t> A{handle, graph};
-  laplacian_matrix_t<vertex_t, weight_t> L{handle, csr_m};
-
-  // Initialize output
-  cost    = 0;
-  edgeCut = 0;
-
-  // Iterate through partitions
-  for (i = 0; i < nClusters; ++i) {
-    // Construct indicator vector for ith partition
-    if (!construct_indicator(handle, i, n, clustersize, partEdgesCut, clusters, part_i, Lx, L)) {
-      WARNING("empty partition");
-      continue;
-    }
-
-    // Record results
-    cost += partEdgesCut / clustersize;
-    edgeCut += partEdgesCut / 2;
-  }
+  detail::analyzePartition<vertex_t, weight_t>(handle, csr_m, nClusters, clusters, edgeCut, cost);
 }
 
 }  // namespace spectral
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
index a3df5c7a4b..fda60e1cb0 100644
--- a/cpp/test/CMakeLists.txt
+++ b/cpp/test/CMakeLists.txt
@@ -65,8 +65,6 @@ add_executable(test_raft
     test/matrix/math.cu
     test/matrix/matrix.cu
     test/matrix/linewise_op.cu
-    test/mr/device/buffer.cpp
-    test/mr/host/buffer.cpp
     test/mst.cu
     test/random/rng.cu
     test/random/rng_int.cu
diff --git a/cpp/test/cluster_solvers.cu b/cpp/test/cluster_solvers.cu
index 2c7996514a..0030596e21 100644
--- a/cpp/test/cluster_solvers.cu
+++ b/cpp/test/cluster_solvers.cu
@@ -19,9 +19,11 @@
 #include <memory>
 #include <raft/handle.hpp>
 
+#include <raft/spectral/cluster_solvers.hpp>
 #include <raft/spectral/modularity_maximization.hpp>
 
 namespace raft {
+namespace spectral {
 
 TEST(Raft, ClusterSolvers)
 {
@@ -60,7 +62,12 @@ TEST(Raft, ModularitySolvers)
   using value_type = double;
 
   handle_t h;
-  ASSERT_EQ(0, h.get_device());
+  ASSERT_EQ(0,
+            h.
+
+            get_device()
+
+  );
 
   index_type neigvs{10};
   index_type maxiter{100};
@@ -95,4 +102,5 @@ TEST(Raft, ModularitySolvers)
   EXPECT_ANY_THROW(spectral::analyzeModularity(h, sm, k, clusters, modularity));
 }
 
+}  // namespace spectral
 }  // namespace raft
diff --git a/cpp/test/eigen_solvers.cu b/cpp/test/eigen_solvers.cu
index f898d11d2e..541d4dccc8 100644
--- a/cpp/test/eigen_solvers.cu
+++ b/cpp/test/eigen_solvers.cu
@@ -16,6 +16,7 @@
 
 #include <raft/common/nvtx.hpp>
 #include <raft/handle.hpp>
+#include <raft/spectral/eigen_solvers.hpp>
 #include <raft/spectral/partition.hpp>
 
 #include <gtest/gtest.h>
@@ -25,6 +26,7 @@
 #include <memory>
 
 namespace raft {
+namespace spectral {
 
 TEST(Raft, EigenSolvers)
 {
@@ -34,7 +36,12 @@ TEST(Raft, EigenSolvers)
   using value_type = double;
 
   handle_t h;
-  ASSERT_EQ(0, h.get_device());
+  ASSERT_EQ(0,
+            h.
+
+            get_device()
+
+  );
 
   index_type* ro{nullptr};
   index_type* ci{nullptr};
@@ -75,7 +82,12 @@ TEST(Raft, SpectralSolvers)
   using value_type = double;
 
   handle_t h;
-  ASSERT_EQ(0, h.get_device());
+  ASSERT_EQ(0,
+            h.
+
+            get_device()
+
+  );
 
   index_type neigvs{10};
   index_type maxiter{100};
@@ -109,4 +121,5 @@ TEST(Raft, SpectralSolvers)
   EXPECT_ANY_THROW(spectral::analyzePartition(h, sm, k, clusters, edgeCut, cost));
 }
 
+}  // namespace spectral
 }  // namespace raft
diff --git a/cpp/test/label/label.cu b/cpp/test/label/label.cu
index d441bf95a8..b19accc3b4 100644
--- a/cpp/test/label/label.cu
+++ b/cpp/test/label/label.cu
@@ -16,7 +16,7 @@
 
 #include <gtest/gtest.h>
 
-#include <raft/label/classlabels.cuh>
+#include <raft/label/classlabels.hpp>
 
 #include "../test_utils.h"
 #include <raft/cuda_utils.cuh>
diff --git a/cpp/test/label/merge_labels.cu b/cpp/test/label/merge_labels.cu
index 5d30af795f..db6b34bbd6 100644
--- a/cpp/test/label/merge_labels.cu
+++ b/cpp/test/label/merge_labels.cu
@@ -15,7 +15,7 @@
  */
 
 #include <gtest/gtest.h>
-#include <raft/label/merge_labels.cuh>
+#include <raft/label/merge_labels.hpp>
 
 #include "../test_utils.h"
 #include <raft/cudart_utils.h>
diff --git a/cpp/test/lap/lap.cu b/cpp/test/lap/lap.cu
index afdebae1f8..24e1c6be4f 100644
--- a/cpp/test/lap/lap.cu
+++ b/cpp/test/lap/lap.cu
@@ -28,7 +28,7 @@
 
 #include <iostream>
 #include <omp.h>
-#include <raft/lap/lap.cuh>
+#include <raft/lap/lap.hpp>
 #include <random>
 
 #define PROBLEMSIZE  1000  // Number of rows/columns
diff --git a/cpp/test/mr/device/buffer.cpp b/cpp/test/mr/device/buffer.cpp
deleted file mode 100644
index 4861a4ca1f..0000000000
--- a/cpp/test/mr/device/buffer.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-#include <iostream>
-#include <memory>
-#include <raft/cudart_utils.h>
-#include <rmm/device_uvector.hpp>
-#include <rmm/mr/device/limiting_resource_adaptor.hpp>
-
-namespace raft {
-namespace mr {
-namespace device {
-
-TEST(Raft, DeviceBufferAlloc)
-{
-  cudaStream_t stream;
-  RAFT_CUDA_TRY(cudaStreamCreate(&stream));
-  // no allocation at construction
-  rmm::device_uvector<char> buff(0, stream);
-  ASSERT_EQ(0, buff.size());
-  // explicit allocation after construction
-  buff.resize(20, stream);
-  ASSERT_EQ(20, buff.size());
-  // resizing to a smaller buffer size
-  buff.resize(10, stream);
-  ASSERT_EQ(10, buff.size());
-  // explicit deallocation
-  buff.release();
-  ASSERT_EQ(0, buff.size());
-  // use these methods without the explicit stream parameter
-  buff.resize(20, stream);
-  ASSERT_EQ(20, buff.size());
-  buff.resize(10, stream);
-  ASSERT_EQ(10, buff.size());
-  buff.release();
-  ASSERT_EQ(0, buff.size());
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-  RAFT_CUDA_TRY(cudaStreamDestroy(stream));
-}
-
-TEST(Raft, DeviceBufferZeroResize)
-{
-  // Create a limiting_resource_adaptor to track allocations
-  auto curr_mr =
-    dynamic_cast<rmm::mr::cuda_memory_resource*>(rmm::mr::get_current_device_resource());
-  auto limit_mr =
-    std::make_shared<rmm::mr::limiting_resource_adaptor<rmm::mr::cuda_memory_resource>>(curr_mr,
-                                                                                        1000);
-
-  rmm::mr::set_current_device_resource(limit_mr.get());
-
-  cudaStream_t stream;
-  RAFT_CUDA_TRY(cudaStreamCreate(&stream));
-  // no allocation at construction
-  rmm::device_uvector<char> buff(10, stream);
-  ASSERT_EQ(10, buff.size());
-  // explicit allocation after construction
-  buff.resize(0, stream);
-  ASSERT_EQ(0, buff.size());
-  // resizing to a smaller buffer size
-  buff.resize(20, stream);
-  ASSERT_EQ(20, buff.size());
-  // explicit deallocation
-  buff.release();
-  ASSERT_EQ(0, buff.size());
-
-  // Now check that there is no memory left. (Used to not be true)
-  ASSERT_EQ(0, limit_mr->get_allocated_bytes());
-
-  rmm::mr::set_current_device_resource(curr_mr);
-
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-  RAFT_CUDA_TRY(cudaStreamDestroy(stream));
-}
-
-}  // namespace device
-}  // namespace mr
-}  // namespace raft
diff --git a/cpp/test/mr/host/buffer.cpp b/cpp/test/mr/host/buffer.cpp
deleted file mode 100644
index d645ffa0e0..0000000000
--- a/cpp/test/mr/host/buffer.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-#include <iostream>
-#include <memory>
-#include <raft/mr/device/buffer.hpp>
-#include <raft/mr/host/buffer.hpp>
-
-namespace raft {
-namespace mr {
-namespace host {
-
-TEST(Raft, HostBuffer)
-{
-  auto alloc = std::make_shared<default_allocator>();
-  cudaStream_t stream;
-  RAFT_CUDA_TRY(cudaStreamCreate(&stream));
-  // no allocation at construction
-  buffer<char> buff(alloc, stream);
-  ASSERT_EQ(0, buff.size());
-  // explicit allocation after construction
-  buff.resize(20, stream);
-  ASSERT_EQ(20, buff.size());
-  // resizing to a smaller buffer size
-  buff.resize(10, stream);
-  ASSERT_EQ(10, buff.size());
-  // explicit deallocation
-  buff.release(stream);
-  ASSERT_EQ(0, buff.size());
-  // use these methods without the explicit stream parameter
-  buff.resize(20);
-  ASSERT_EQ(20, buff.size());
-  buff.resize(10);
-  ASSERT_EQ(10, buff.size());
-  buff.release();
-  ASSERT_EQ(0, buff.size());
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-  RAFT_CUDA_TRY(cudaStreamDestroy(stream));
-}
-
-TEST(Raft, DeviceToHostBuffer)
-{
-  auto d_alloc = std::make_shared<device::default_allocator>();
-  auto h_alloc = std::make_shared<default_allocator>();
-  cudaStream_t stream;
-  RAFT_CUDA_TRY(cudaStreamCreate(&stream));
-  device::buffer<char> d_buff(d_alloc, stream, 32);
-  RAFT_CUDA_TRY(cudaMemsetAsync(d_buff.data(), 0, sizeof(char) * d_buff.size(), stream));
-  buffer<char> h_buff(h_alloc, d_buff);
-  ASSERT_EQ(d_buff.size(), h_buff.size());
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-  RAFT_CUDA_TRY(cudaStreamDestroy(stream));
-}
-
-}  // namespace host
-}  // namespace mr
-}  // namespace raft
diff --git a/cpp/test/spectral_matrix.cu b/cpp/test/spectral_matrix.cu
index fa54b04cda..652aa61451 100644
--- a/cpp/test/spectral_matrix.cu
+++ b/cpp/test/spectral_matrix.cu
@@ -19,7 +19,7 @@
 #include <memory>
 #include <raft/handle.hpp>
 
-#include <raft/spectral/matrix_wrappers.hpp>
+#include <raft/spectral/detail/matrix_wrappers.cuh>
 
 namespace raft {
 namespace {