From 2ebf89c3969d23f8f394a409909997c8d9593ae1 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 8 Feb 2022 22:19:20 -0500 Subject: [PATCH] Hiding implementation details for lap, clustering, spectral, and label (#477) Also managed to remove the raft host/device buffers in the process Authors: - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Divye Gala (https://github.com/divyegala) URL: https://github.com/rapidsai/raft/pull/477 --- .../kmeans.hpp => cluster/detail/kmeans.cuh} | 17 +- cpp/include/raft/cluster/kmeans.hpp | 65 ++++++ cpp/include/raft/comms/helper.hpp | 1 - cpp/include/raft/comms/std_comms.hpp | 2 - cpp/include/raft/label/classlabels.hpp | 117 ++++++++++ .../raft/label/{ => detail}/classlabels.cuh | 3 + .../raft/label/{ => detail}/merge_labels.cuh | 2 + cpp/include/raft/label/merge_labels.hpp | 66 ++++++ cpp/include/raft/lap/{ => detail}/d_structs.h | 0 .../raft/lap/{ => detail}/lap_functions.cuh | 2 +- .../raft/lap/{ => detail}/lap_kernels.cuh | 1 - cpp/include/raft/lap/{lap.cuh => lap.hpp} | 4 +- cpp/include/raft/linalg/detail/lanczos.hpp | 6 +- cpp/include/raft/mr/buffer_base.hpp | 211 ------------------ cpp/include/raft/mr/device/buffer.hpp | 70 ------ cpp/include/raft/mr/host/buffer.hpp | 85 ------- .../raft/sparse/distance/detail/coo_spmv.cuh | 1 - .../raft/sparse/distance/detail/utils.cuh | 1 - .../sparse/hierarchy/detail/agglomerative.cuh | 1 - .../hierarchy/detail/connectivities.cuh | 1 - .../raft/sparse/hierarchy/detail/mst.cuh | 1 - .../raft/sparse/linalg/detail/spectral.cuh | 8 +- cpp/include/raft/sparse/op/detail/reduce.cuh | 1 - .../selection/detail/connect_components.cuh | 3 +- .../raft/sparse/selection/detail/knn.cuh | 1 - cpp/include/raft/spatial/knn/ann.hpp | 2 - cpp/include/raft/spatial/knn/knn.hpp | 4 - cpp/include/raft/spectral/cluster_solvers.hpp | 28 ++- .../raft/spectral/{ => detail}/lapack.hpp | 0 .../matrix_wrappers.cuh} | 0 .../detail/modularity_maximization.hpp | 188 ++++++++++++++++ .../raft/spectral/detail/partition.hpp | 182 +++++++++++++++ .../spectral_util.cuh} | 0 .../raft/spectral/{ => detail}/warn_dbg.hpp | 0 cpp/include/raft/spectral/eigen_solvers.hpp | 3 + .../raft/spectral/modularity_maximization.hpp | 110 +-------- cpp/include/raft/spectral/partition.hpp | 94 +------- cpp/test/CMakeLists.txt | 2 - cpp/test/cluster_solvers.cu | 10 +- cpp/test/eigen_solvers.cu | 17 +- cpp/test/label/label.cu | 2 +- cpp/test/label/merge_labels.cu | 2 +- cpp/test/lap/lap.cu | 2 +- cpp/test/mr/device/buffer.cpp | 92 -------- cpp/test/mr/host/buffer.cpp | 71 ------ cpp/test/spectral_matrix.cu | 2 +- 46 files changed, 697 insertions(+), 784 deletions(-) rename cpp/include/raft/{spectral/kmeans.hpp => cluster/detail/kmeans.cuh} (99%) create mode 100644 cpp/include/raft/cluster/kmeans.hpp create mode 100644 cpp/include/raft/label/classlabels.hpp rename cpp/include/raft/label/{ => detail}/classlabels.cuh (99%) rename cpp/include/raft/label/{ => detail}/merge_labels.cuh (99%) create mode 100644 cpp/include/raft/label/merge_labels.hpp rename cpp/include/raft/lap/{ => detail}/d_structs.h (100%) rename cpp/include/raft/lap/{ => detail}/lap_functions.cuh (99%) rename cpp/include/raft/lap/{ => detail}/lap_kernels.cuh (99%) rename cpp/include/raft/lap/{lap.cuh => lap.hpp} (99%) delete mode 100644 cpp/include/raft/mr/buffer_base.hpp delete mode 100644 cpp/include/raft/mr/device/buffer.hpp delete mode 100644 cpp/include/raft/mr/host/buffer.hpp rename cpp/include/raft/spectral/{ => detail}/lapack.hpp (100%) rename cpp/include/raft/spectral/{matrix_wrappers.hpp => detail/matrix_wrappers.cuh} (100%) create mode 100644 cpp/include/raft/spectral/detail/modularity_maximization.hpp create mode 100644 cpp/include/raft/spectral/detail/partition.hpp rename cpp/include/raft/spectral/{spectral_util.hpp => detail/spectral_util.cuh} (100%) rename cpp/include/raft/spectral/{ => detail}/warn_dbg.hpp (100%) delete mode 100644 cpp/test/mr/device/buffer.cpp delete mode 100644 cpp/test/mr/host/buffer.cpp diff --git a/cpp/include/raft/spectral/kmeans.hpp b/cpp/include/raft/cluster/detail/kmeans.cuh similarity index 99% rename from cpp/include/raft/spectral/kmeans.hpp rename to cpp/include/raft/cluster/detail/kmeans.cuh index 56f4022a8c..5f1a0e137d 100644 --- a/cpp/include/raft/spectral/kmeans.hpp +++ b/cpp/include/raft/cluster/detail/kmeans.cuh @@ -32,13 +32,12 @@ #include #include #include -#include -#include +#include +#include -namespace { - -using namespace raft; -using namespace raft::linalg; +namespace raft { +namespace cluster { +namespace detail { // ========================================================= // Useful grid settings // ========================================================= @@ -728,10 +727,6 @@ static int updateCentroids(handle_t const& handle, return 0; } -} // namespace - -namespace raft { - // ========================================================= // k-means algorithm // ========================================================= @@ -986,4 +981,6 @@ int kmeans(handle_t const& handle, seed); } +} // namespace detail +} // namespace cluster } // namespace raft diff --git a/cpp/include/raft/cluster/kmeans.hpp b/cpp/include/raft/cluster/kmeans.hpp new file mode 100644 index 0000000000..ab0fbb04c7 --- /dev/null +++ b/cpp/include/raft/cluster/kmeans.hpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +namespace raft { +namespace cluster { + +/** + * @brief Find clusters with k-means algorithm. + * Initial centroids are chosen with k-means++ algorithm. Empty + * clusters are reinitialized by choosing new centroids with + * k-means++ algorithm. + * @tparam index_type_t the type of data used for indexing. + * @tparam value_type_t the type of data used for weights, distances. + * @param handle the raft handle. + * @param n Number of observation vectors. + * @param d Dimension of observation vectors. + * @param k Number of clusters. + * @param tol Tolerance for convergence. k-means stops when the + * change in residual divided by n is less than tol. + * @param maxiter Maximum number of k-means iterations. + * @param obs (Input, device memory, d*n entries) Observation + * matrix. Matrix is stored column-major and each column is an + * observation vector. Matrix dimensions are d x n. + * @param codes (Output, device memory, n entries) Cluster + * assignments. + * @param residual On exit, residual sum of squares (sum of squares + * of distances between observation vectors and centroids). + * @param iters on exit, number of k-means iterations. + * @param seed random seed to be used. + * @return error flag + */ +template +int kmeans(handle_t const& handle, + index_type_t n, + index_type_t d, + index_type_t k, + value_type_t tol, + index_type_t maxiter, + const value_type_t* __restrict__ obs, + index_type_t* __restrict__ codes, + value_type_t& residual, + index_type_t& iters, + unsigned long long seed = 123456) +{ + return detail::kmeans( + handle, n, d, k, tol, maxiter, obs, codes, residual, iters, seed); +} +} // namespace cluster +} // namespace raft diff --git a/cpp/include/raft/comms/helper.hpp b/cpp/include/raft/comms/helper.hpp index 09a767bea7..d83e8f4d4f 100644 --- a/cpp/include/raft/comms/helper.hpp +++ b/cpp/include/raft/comms/helper.hpp @@ -18,7 +18,6 @@ #include #include -#include #include #include diff --git a/cpp/include/raft/comms/std_comms.hpp b/cpp/include/raft/comms/std_comms.hpp index f54535a88c..b4aa72d53e 100644 --- a/cpp/include/raft/comms/std_comms.hpp +++ b/cpp/include/raft/comms/std_comms.hpp @@ -21,8 +21,6 @@ #include #include -#include - #include #include #include diff --git a/cpp/include/raft/label/classlabels.hpp b/cpp/include/raft/label/classlabels.hpp new file mode 100644 index 0000000000..de9f60518d --- /dev/null +++ b/cpp/include/raft/label/classlabels.hpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace raft { +namespace label { + +/** + * Get unique class labels. + * + * The y array is assumed to store class labels. The unique values are selected + * from this array. + * + * @tparam value_t numeric type of the arrays with class labels + * @param [inout] unique output unique labels + * @param [in] y device array of labels, size [n] + * @param [in] n number of labels + * @param [in] stream cuda stream + * @returns unique device array of unique labels, unallocated on entry, + * on exit it has size + */ +template +int getUniquelabels(rmm::device_uvector& unique, value_t* y, size_t n, cudaStream_t stream) +{ + return detail::getUniquelabels(unique, y, n, stream); +} + +/** + * Assign one versus rest labels. + * + * The output labels will have values +/-1: + * y_out = (y == y_unique[idx]) ? +1 : -1; + * + * The output type currently is set to value_t, but for SVM in principle we are + * free to choose other type for y_out (it should represent +/-1, and it is used + * in floating point arithmetics). + * + * @param [in] y device array if input labels, size [n] + * @param [in] n number of labels + * @param [in] y_unique device array of unique labels, size [n_classes] + * @param [in] n_classes number of unique labels + * @param [out] y_out device array of output labels + * @param [in] idx index of unique label that should be labeled as 1 + * @param [in] stream cuda stream + */ +template +void getOvrlabels( + value_t* y, int n, value_t* y_unique, int n_classes, value_t* y_out, int idx, cudaStream_t stream) +{ + detail::getOvrlabels(y, n, y_unique, n_classes, y_out, idx, stream); +} +/** + * Maps an input array containing a series of numbers into a new array + * where numbers have been mapped to a monotonically increasing set + * of labels. This can be useful in machine learning algorithms, for instance, + * where a given set of labels is not taken from a monotonically increasing + * set. This can happen if they are filtered or if only a subset of the + * total labels are used in a dataset. This is also useful in graph algorithms + * where a set of vertices need to be labeled in a monotonically increasing + * order. + * @tparam Type the numeric type of the input and output arrays + * @tparam Lambda the type of an optional filter function, which determines + * which items in the array to map. + * @param[out] out the output monotonic array + * @param[in] in input label array + * @param[in] N number of elements in the input array + * @param[in] stream cuda stream to use + * @param[in] filter_op an optional function for specifying which values + * should have monotonically increasing labels applied to them. + * @param[in] zero_based force monotonic set to start at 0? + */ +template +void make_monotonic( + Type* out, Type* in, size_t N, cudaStream_t stream, Lambda filter_op, bool zero_based = false) +{ + detail::make_monotonic(out, in, N, stream, filter_op, zero_based); +} + +/** + * Maps an input array containing a series of numbers into a new array + * where numbers have been mapped to a monotonically increasing set + * of labels. This can be useful in machine learning algorithms, for instance, + * where a given set of labels is not taken from a monotonically increasing + * set. This can happen if they are filtered or if only a subset of the + * total labels are used in a dataset. This is also useful in graph algorithms + * where a set of vertices need to be labeled in a monotonically increasing + * order. + * @tparam Type the numeric type of the input and output arrays + * @param[out] out output label array with labels assigned monotonically + * @param[in] in input label array + * @param[in] N number of elements in the input array + * @param[in] stream cuda stream to use + * @param[in] zero_based force monotonic label set to start at 0? + */ +template +void make_monotonic(Type* out, Type* in, size_t N, cudaStream_t stream, bool zero_based = false) +{ + detail::make_monotonic(out, in, N, stream, zero_based); +} +}; // namespace label +}; // end namespace raft diff --git a/cpp/include/raft/label/classlabels.cuh b/cpp/include/raft/label/detail/classlabels.cuh similarity index 99% rename from cpp/include/raft/label/classlabels.cuh rename to cpp/include/raft/label/detail/classlabels.cuh index fda4c02a1c..c805860759 100644 --- a/cpp/include/raft/label/classlabels.cuh +++ b/cpp/include/raft/label/detail/classlabels.cuh @@ -26,6 +26,7 @@ namespace raft { namespace label { +namespace detail { /** * Get unique class labels. @@ -194,5 +195,7 @@ void make_monotonic(Type* out, Type* in, size_t N, cudaStream_t stream, bool zer make_monotonic( out, in, N, stream, [] __device__(Type val) { return false; }, zero_based); } + +}; // namespace detail }; // namespace label }; // end namespace raft diff --git a/cpp/include/raft/label/merge_labels.cuh b/cpp/include/raft/label/detail/merge_labels.cuh similarity index 99% rename from cpp/include/raft/label/merge_labels.cuh rename to cpp/include/raft/label/detail/merge_labels.cuh index 9cd5a29951..bf03d1c738 100644 --- a/cpp/include/raft/label/merge_labels.cuh +++ b/cpp/include/raft/label/detail/merge_labels.cuh @@ -25,6 +25,7 @@ namespace raft { namespace label { +namespace detail { /** Note: this is one possible implementation where we represent the label * equivalence graph implicitly using labels_a, labels_b and mask. @@ -153,5 +154,6 @@ void merge_labels(value_idx* labels_a, RAFT_CUDA_TRY(cudaPeekAtLastError()); } +} // namespace detail }; // namespace label }; // namespace raft \ No newline at end of file diff --git a/cpp/include/raft/label/merge_labels.hpp b/cpp/include/raft/label/merge_labels.hpp new file mode 100644 index 0000000000..5ba8fe8a27 --- /dev/null +++ b/cpp/include/raft/label/merge_labels.hpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace raft { +namespace label { + +/** + * @brief Merge two labellings in-place, according to a core mask + * + * A labelling is a representation of disjoint sets (groups) where points that + * belong to the same group have the same label. It is assumed that group + * labels take values between 1 and N. labels relate to points, i.e a label i+1 + * means that you belong to the same group as the point i. + * The special value MAX_LABEL is used to mark points that are not labelled. + * + * The two label arrays A and B induce two sets of groups over points 0..N-1. + * If a point is labelled i in A and j in B and the mask is true for this + * point, then i and j are equivalent labels and their groups are merged by + * relabeling the elements of both groups to have the same label. The new label + * is the smaller one from the original labels. + * It is required that if the mask is true for a point, this point is labelled + * (i.e its label is different than the special value MAX_LABEL). + * + * One use case is finding connected components: the two input label arrays can + * represent the connected components of graphs G_A and G_B, and the output + * would be the connected components labels of G_A \union G_B. + * + * @param[inout] labels_a First input, and output label array (in-place) + * @param[in] labels_b Second input label array + * @param[in] mask Core point mask + * @param[out] R label equivalence map + * @param[in] m Working flag + * @param[in] N Number of points in the dataset + * @param[in] stream CUDA stream + */ +template +void merge_labels(value_idx* labels_a, + const value_idx* labels_b, + const bool* mask, + value_idx* R, + bool* m, + value_idx N, + cudaStream_t stream) +{ + detail::merge_labels(labels_a, labels_b, mask, R, m, N, stream); +} + +}; // namespace label +}; // namespace raft \ No newline at end of file diff --git a/cpp/include/raft/lap/d_structs.h b/cpp/include/raft/lap/detail/d_structs.h similarity index 100% rename from cpp/include/raft/lap/d_structs.h rename to cpp/include/raft/lap/detail/d_structs.h diff --git a/cpp/include/raft/lap/lap_functions.cuh b/cpp/include/raft/lap/detail/lap_functions.cuh similarity index 99% rename from cpp/include/raft/lap/lap_functions.cuh rename to cpp/include/raft/lap/detail/lap_functions.cuh index ab4aa2df59..6c6b09e5d8 100644 --- a/cpp/include/raft/lap/lap_functions.cuh +++ b/cpp/include/raft/lap/detail/lap_functions.cuh @@ -28,7 +28,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/include/raft/lap/lap_kernels.cuh b/cpp/include/raft/lap/detail/lap_kernels.cuh similarity index 99% rename from cpp/include/raft/lap/lap_kernels.cuh rename to cpp/include/raft/lap/detail/lap_kernels.cuh index 328cbf3e74..b61d0bd269 100644 --- a/cpp/include/raft/lap/lap_kernels.cuh +++ b/cpp/include/raft/lap/detail/lap_kernels.cuh @@ -28,7 +28,6 @@ #include #include -#include #include diff --git a/cpp/include/raft/lap/lap.cuh b/cpp/include/raft/lap/lap.hpp similarity index 99% rename from cpp/include/raft/lap/lap.cuh rename to cpp/include/raft/lap/lap.hpp index 42b898ebff..2350ebcddf 100644 --- a/cpp/include/raft/lap/lap.cuh +++ b/cpp/include/raft/lap/lap.hpp @@ -27,8 +27,8 @@ #include #include -#include "d_structs.h" -#include "lap_functions.cuh" +#include "detail/d_structs.h" +#include "detail/lap_functions.cuh" namespace raft { namespace lap { diff --git a/cpp/include/raft/linalg/detail/lanczos.hpp b/cpp/include/raft/linalg/detail/lanczos.hpp index 3d8fde7e68..a2b7751a05 100644 --- a/cpp/include/raft/linalg/detail/lanczos.hpp +++ b/cpp/include/raft/linalg/detail/lanczos.hpp @@ -28,9 +28,9 @@ #include "cublas_wrappers.hpp" #include #include -#include -#include -#include +#include +#include +#include namespace raft { diff --git a/cpp/include/raft/mr/buffer_base.hpp b/cpp/include/raft/mr/buffer_base.hpp deleted file mode 100644 index 11724bed00..0000000000 --- a/cpp/include/raft/mr/buffer_base.hpp +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#include - -#include -#include -#include - -namespace raft { -namespace mr { - -/** - * @brief Base for all RAII-based owning of temporary memory allocations. This - * class should ideally not be used by users directly, but instead via - * the child classes `device_buffer` and `host_buffer`. - * - * @tparam T data type - * @tparam AllocatorT The underly allocator object - */ -template -class buffer_base { - public: - using size_type = std::size_t; - using value_type = T; - using iterator = value_type*; - using const_iterator = const value_type*; - using reference = T&; - using const_reference = const T&; - - buffer_base() = delete; - - buffer_base(const buffer_base& other) = delete; - - buffer_base& operator=(const buffer_base& other) = delete; - - /** - * @brief Main ctor - * - * @param[in] allocator asynchronous allocator used for managing buffer life - * @param[in] stream cuda stream where this allocation operations are async - * @param[in] n size of the buffer (in number of elements) - */ - buffer_base(std::shared_ptr allocator, cudaStream_t stream, size_type n = 0) - : data_(nullptr), size_(n), capacity_(n), stream_(stream), allocator_(std::move(allocator)) - { - if (capacity_ > 0) { - data_ = - static_cast(allocator_->allocate(capacity_ * sizeof(value_type), stream_)); - RAFT_CUDA_TRY(cudaStreamSynchronize(stream_)); - } - } - - ~buffer_base() { release(); } - - value_type* data() { return data_; } - - const value_type* data() const { return data_; } - - size_type size() const { return size_; } - - void clear() { size_ = 0; } - - iterator begin() { return data_; } - - const_iterator begin() const { return data_; } - - iterator end() { return data_ + size_; } - - const_iterator end() const { return data_ + size_; } - - /** - * @brief Reserve new memory size for this buffer. - * - * It re-allocates a fresh buffer if the new requested capacity is more than - * the current one, copies the old buffer contents to this new buffer and - * removes the old one. - * - * @param[in] new_capacity new capacity (in number of elements) - * @{ - */ - void reserve(size_type new_capacity) - { - if (new_capacity > capacity_) { - auto* new_data = - static_cast(allocator_->allocate(new_capacity * sizeof(value_type), stream_)); - if (size_ > 0) { raft::copy(new_data, data_, size_, stream_); } - // Only deallocate if we have allocated a pointer - if (nullptr != data_) { - allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_); - } - data_ = new_data; - capacity_ = new_capacity; - } - } - - void reserve(size_type new_capacity, cudaStream_t stream) - { - set_stream(stream); - reserve(new_capacity); - } - /** @} */ - - /** - * @brief Resize the underlying buffer (uses `reserve` method internally) - * - * @param[in] new_size new buffer size - * @{ - */ - void resize(const size_type new_size) - { - reserve(new_size); - size_ = new_size; - } - - void resize(const size_type new_size, cudaStream_t stream) - { - set_stream(stream); - resize(new_size); - } - /** @} */ - - /** - * @brief Deletes the underlying buffer - * - * If this method is not explicitly called, it will be during the destructor - * @{ - */ - void release() - { - if (nullptr != data_) { - allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_); - } - data_ = nullptr; - capacity_ = 0; - size_ = 0; - } - - void release(cudaStream_t stream) - { - set_stream(stream); - release(); - } - /** @} */ - - /** - * @brief returns the underlying allocator used - * - * @return the allocator pointer - */ - std::shared_ptr get_allocator() const { return allocator_; } - - /** - * @brief returns the underlying stream used - * - * @return the cuda stream - */ - cudaStream_t get_stream() const { return stream_; } - - protected: - value_type* data_; - - private: - size_type size_; - size_type capacity_; - cudaStream_t stream_; - std::shared_ptr allocator_; - - /** - * @brief Sets a new cuda stream where the future operations will be queued - * - * This method makes sure that the inter-stream dependencies are met and taken - * care of, before setting the input stream as a new stream for this buffer. - * Ideally, the same cuda stream passed during constructor is expected to be - * used throughout this buffer's lifetime, for performance. - * - * @param[in] stream new cuda stream to be set. If it is the same as the - * current one, then this method will be a no-op. - */ - void set_stream(cudaStream_t stream) - { - if (stream_ != stream) { - cudaEvent_t event; - RAFT_CUDA_TRY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming)); - RAFT_CUDA_TRY(cudaEventRecord(event, stream_)); - RAFT_CUDA_TRY(cudaStreamWaitEvent(stream, event, 0)); - stream_ = stream; - RAFT_CUDA_TRY(cudaEventDestroy(event)); - } - } -}; // class buffer_base - -}; // namespace mr -}; // namespace raft diff --git a/cpp/include/raft/mr/device/buffer.hpp b/cpp/include/raft/mr/device/buffer.hpp deleted file mode 100644 index 9b5ff11c50..0000000000 --- a/cpp/include/raft/mr/device/buffer.hpp +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "allocator.hpp" -#include -#include - -namespace raft { -namespace mr { -namespace device { - -/** - * @brief RAII object owning a contiguous typed device buffer. The passed in - * allocator supports asynchronous allocation and deallocation so this - * can also be used for temporary memory - * - * @code{.cpp} - * template - * void foo(..., cudaStream_t stream) { - * ... - * raft::mr::device::buffer temp(stream, 0); - * ... - * temp.resize(n); - * kernelA<<>>(...,temp.data(),...); - * kernelB<<>>(...,temp.data(),...); - * temp.release(); - * ... - * } - * @endcode - */ -template -class buffer : public buffer_base { - public: - using size_type = typename buffer_base::size_type; - using value_type = typename buffer_base::value_type; - using iterator = typename buffer_base::iterator; - using const_iterator = typename buffer_base::const_iterator; - using reference = typename buffer_base::reference; - using const_reference = typename buffer_base::const_reference; - - buffer() = delete; - - buffer(const buffer& other) = delete; - - buffer& operator=(const buffer& other) = delete; - - buffer(std::shared_ptr alloc, cudaStream_t stream, size_type n = 0) - : buffer_base(alloc, stream, n) - { - } -}; // class buffer - -}; // namespace device -}; // namespace mr -}; // namespace raft diff --git a/cpp/include/raft/mr/host/buffer.hpp b/cpp/include/raft/mr/host/buffer.hpp deleted file mode 100644 index 204b384719..0000000000 --- a/cpp/include/raft/mr/host/buffer.hpp +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "allocator.hpp" -#include -#include -#include - -namespace raft { -namespace mr { -namespace host { - -/** - * @brief RAII object owning a contigous typed host buffer (aka pinned memory). - * The passed in allocator supports asynchronus allocation and - * deallocation so this can also be used for temporary memory - * - * @code{.cpp} - * template - * void foo(const T* in_d , T* out_d, ..., cudaStream_t stream) { - * ... - * raft::mr::host::buffer temp(stream, 0); - * ... - * temp.resize(n); - * raft::copy(temp.data(), in_d, temp.size()); - * ... - * raft::copy(out_d, temp.data(), temp.size()); - * temp.release(stream); - * ... - * } - * @endcode - */ -template -class buffer : public buffer_base { - public: - using size_type = typename buffer_base::size_type; - using value_type = typename buffer_base::value_type; - using iterator = typename buffer_base::iterator; - using const_iterator = typename buffer_base::const_iterator; - using reference = typename buffer_base::reference; - using const_reference = typename buffer_base::const_reference; - - buffer() = delete; - - buffer(const buffer& other) = delete; - - buffer& operator=(const buffer& other) = delete; - - buffer(std::shared_ptr alloc, const device::buffer& other) - : buffer_base(alloc, other.get_stream(), other.size()) - { - if (other.size() > 0) { raft::copy(data_, other.data(), other.size(), other.get_stream()); } - } - - buffer(std::shared_ptr alloc, cudaStream_t stream, size_type n = 0) - : buffer_base(alloc, stream, n) - { - } - - reference operator[](size_type pos) { return data_[pos]; } - - const_reference operator[](size_type pos) const { return data_[pos]; } - - private: - using buffer_base::data_; -}; - -}; // namespace host -}; // namespace mr -}; // namespace raft diff --git a/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh b/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh index 046b65a0f0..020de9e014 100644 --- a/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh +++ b/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh @@ -21,7 +21,6 @@ #include #include -#include #include #include "../../csr.hpp" diff --git a/cpp/include/raft/sparse/distance/detail/utils.cuh b/cpp/include/raft/sparse/distance/detail/utils.cuh index 8c01b33c1e..06c034ad9f 100644 --- a/cpp/include/raft/sparse/distance/detail/utils.cuh +++ b/cpp/include/raft/sparse/distance/detail/utils.cuh @@ -17,7 +17,6 @@ #pragma once #include -#include #include diff --git a/cpp/include/raft/sparse/hierarchy/detail/agglomerative.cuh b/cpp/include/raft/sparse/hierarchy/detail/agglomerative.cuh index 31ebe38d85..105f1cc9f6 100644 --- a/cpp/include/raft/sparse/hierarchy/detail/agglomerative.cuh +++ b/cpp/include/raft/sparse/hierarchy/detail/agglomerative.cuh @@ -19,7 +19,6 @@ #include #include #include -#include #include diff --git a/cpp/include/raft/sparse/hierarchy/detail/connectivities.cuh b/cpp/include/raft/sparse/hierarchy/detail/connectivities.cuh index bd96ca8649..fe58246545 100644 --- a/cpp/include/raft/sparse/hierarchy/detail/connectivities.cuh +++ b/cpp/include/raft/sparse/hierarchy/detail/connectivities.cuh @@ -24,7 +24,6 @@ #include #include -#include #include #include #include diff --git a/cpp/include/raft/sparse/hierarchy/detail/mst.cuh b/cpp/include/raft/sparse/hierarchy/detail/mst.cuh index 7173c76c08..10e9d04c0d 100644 --- a/cpp/include/raft/sparse/hierarchy/detail/mst.cuh +++ b/cpp/include/raft/sparse/hierarchy/detail/mst.cuh @@ -19,7 +19,6 @@ #include #include -#include #include #include #include diff --git a/cpp/include/raft/sparse/linalg/detail/spectral.cuh b/cpp/include/raft/sparse/linalg/detail/spectral.cuh index 5b43798e2e..95d9c0d1c5 100644 --- a/cpp/include/raft/sparse/linalg/detail/spectral.cuh +++ b/cpp/include/raft/sparse/linalg/detail/spectral.cuh @@ -17,7 +17,8 @@ #include #include -#include +#include +#include #include #include @@ -70,11 +71,12 @@ void fit_embedding(const raft::handle_t& handle, value_type tol = 0.01; index_type restart_iter = 15 + neigvs; // what cugraph is using - raft::eigen_solver_config_t cfg{neigvs, maxiter, restart_iter, tol}; + raft::spectral::eigen_solver_config_t cfg{ + neigvs, maxiter, restart_iter, tol}; cfg.seed = seed; - raft::lanczos_solver_t eig_solver{cfg}; + raft::spectral::lanczos_solver_t eig_solver{cfg}; // cluster computation here is irrelevant, // hence define a no-op such solver to diff --git a/cpp/include/raft/sparse/op/detail/reduce.cuh b/cpp/include/raft/sparse/op/detail/reduce.cuh index e4a64fbb51..988f478f2b 100644 --- a/cpp/include/raft/sparse/op/detail/reduce.cuh +++ b/cpp/include/raft/sparse/op/detail/reduce.cuh @@ -20,7 +20,6 @@ #include #include -#include #include #include diff --git a/cpp/include/raft/sparse/selection/detail/connect_components.cuh b/cpp/include/raft/sparse/selection/detail/connect_components.cuh index afbb7f17b3..2b9ca2d8b5 100644 --- a/cpp/include/raft/sparse/selection/detail/connect_components.cuh +++ b/cpp/include/raft/sparse/selection/detail/connect_components.cuh @@ -17,9 +17,8 @@ #include #include -#include +#include #include -#include #include #include #include diff --git a/cpp/include/raft/sparse/selection/detail/knn.cuh b/cpp/include/raft/sparse/selection/detail/knn.cuh index 3de10a2782..82a689fe00 100644 --- a/cpp/include/raft/sparse/selection/detail/knn.cuh +++ b/cpp/include/raft/sparse/selection/detail/knn.cuh @@ -23,7 +23,6 @@ #include #include #include -#include #include #include diff --git a/cpp/include/raft/spatial/knn/ann.hpp b/cpp/include/raft/spatial/knn/ann.hpp index 6ce9463e43..5f64a8d1b5 100644 --- a/cpp/include/raft/spatial/knn/ann.hpp +++ b/cpp/include/raft/spatial/knn/ann.hpp @@ -22,8 +22,6 @@ #include #include -#include - namespace raft { namespace spatial { namespace knn { diff --git a/cpp/include/raft/spatial/knn/knn.hpp b/cpp/include/raft/spatial/knn/knn.hpp index b29c4cc51c..59df75ba36 100644 --- a/cpp/include/raft/spatial/knn/knn.hpp +++ b/cpp/include/raft/spatial/knn/knn.hpp @@ -19,14 +19,10 @@ #include "detail/knn_brute_force_faiss.cuh" #include "detail/selection_faiss.cuh" -#include - namespace raft { namespace spatial { namespace knn { -using deviceAllocator = raft::mr::device::allocator; - /** * Performs a k-select across row partitioned index/distance * matrices formatted like the following: diff --git a/cpp/include/raft/spectral/cluster_solvers.hpp b/cpp/include/raft/spectral/cluster_solvers.hpp index 221a9679d4..cc25e66cae 100644 --- a/cpp/include/raft/spectral/cluster_solvers.hpp +++ b/cpp/include/raft/spectral/cluster_solvers.hpp @@ -15,10 +15,11 @@ */ #pragma once -#include +#include #include // for std::pair namespace raft { +namespace spectral { using namespace matrix; @@ -52,17 +53,18 @@ struct kmeans_solver_t { RAFT_EXPECTS(codes != nullptr, "Null codes buffer."); value_type_t residual{}; index_type_t iters{}; - kmeans(handle, - n_obs_vecs, - dim, - config_.n_clusters, - config_.tol, - config_.maxIter, - obs, - codes, - residual, - iters, - config_.seed); + + raft::cluster::kmeans(handle, + n_obs_vecs, + dim, + config_.n_clusters, + config_.tol, + config_.maxIter, + obs, + codes, + residual, + iters, + config_.seed); return std::make_pair(residual, iters); } @@ -71,4 +73,6 @@ struct kmeans_solver_t { private: cluster_solver_config_t config_; }; + +} // namespace spectral } // namespace raft diff --git a/cpp/include/raft/spectral/lapack.hpp b/cpp/include/raft/spectral/detail/lapack.hpp similarity index 100% rename from cpp/include/raft/spectral/lapack.hpp rename to cpp/include/raft/spectral/detail/lapack.hpp diff --git a/cpp/include/raft/spectral/matrix_wrappers.hpp b/cpp/include/raft/spectral/detail/matrix_wrappers.cuh similarity index 100% rename from cpp/include/raft/spectral/matrix_wrappers.hpp rename to cpp/include/raft/spectral/detail/matrix_wrappers.cuh diff --git a/cpp/include/raft/spectral/detail/modularity_maximization.hpp b/cpp/include/raft/spectral/detail/modularity_maximization.hpp new file mode 100644 index 0000000000..a55dfbe67f --- /dev/null +++ b/cpp/include/raft/spectral/detail/modularity_maximization.hpp @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include + +#ifdef COLLECT_TIME_STATISTICS +#include +#include +#include +#include +#include +#endif + +#ifdef COLLECT_TIME_STATISTICS +static double timer(void) +{ + struct timeval tv; + cudaDeviceSynchronize(); + gettimeofday(&tv, NULL); + return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0; +} +#endif + +namespace raft { +namespace spectral { +namespace detail { + +using namespace matrix; +using namespace linalg; + +// ========================================================= +// Spectral modularity_maximization +// ========================================================= + +/** Compute partition for a weighted undirected graph. This + * partition attempts to minimize the cost function: + * Cost = \sum_i (Edges cut by ith partition)/(Vertices in ith partition) + * + * @param G Weighted graph in CSR format + * @param nClusters Number of partitions. + * @param nEigVecs Number of eigenvectors to compute. + * @param maxIter_lanczos Maximum number of Lanczos iterations. + * @param restartIter_lanczos Maximum size of Lanczos system before + * implicit restart. + * @param tol_lanczos Convergence tolerance for Lanczos method. + * @param maxIter_kmeans Maximum number of k-means iterations. + * @param tol_kmeans Convergence tolerance for k-means algorithm. + * @param clusters (Output, device memory, n entries) Cluster + * assignments. + * @param iters_lanczos On exit, number of Lanczos iterations + * performed. + * @param iters_kmeans On exit, number of k-means iterations + * performed. + * @return error flag. + */ +template +std::tuple modularity_maximization( + handle_t const& handle, + sparse_matrix_t const& csr_m, + EigenSolver const& eigen_solver, + ClusterSolver const& cluster_solver, + vertex_t* __restrict__ clusters, + weight_t* eigVals, + weight_t* eigVecs) +{ + RAFT_EXPECTS(clusters != nullptr, "Null clusters buffer."); + RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer."); + RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer."); + + auto stream = handle.get_stream(); + auto cublas_h = handle.get_cublas_handle(); + + std::tuple + stats; // # iters eigen solver, cluster solver residual, # iters cluster solver + + vertex_t n = csr_m.nrows_; + + // Compute eigenvectors of Modularity Matrix + + // Initialize Modularity Matrix + modularity_matrix_t B{handle, csr_m}; + + auto eigen_config = eigen_solver.get_config(); + auto nEigVecs = eigen_config.n_eigVecs; + + // Compute eigenvectors corresponding to largest eigenvalues + std::get<0>(stats) = eigen_solver.solve_largest_eigenvectors(handle, B, eigVals, eigVecs); + + // Whiten eigenvector matrix + transform_eigen_matrix(handle, n, nEigVecs, eigVecs); + + // notice that at this point the matrix has already been transposed, so we are scaling + // columns + scale_obs(nEigVecs, n, eigVecs); + RAFT_CHECK_CUDA(stream); + + // Find partition clustering + auto pair_cluster = cluster_solver.solve(handle, n, nEigVecs, eigVecs, clusters); + + std::get<1>(stats) = pair_cluster.first; + std::get<2>(stats) = pair_cluster.second; + + return stats; +} +//=================================================== +// Analysis of graph partition +// ========================================================= + +/// Compute modularity +/** This function determines the modularity based on a graph and cluster assignments + * @param G Weighted graph in CSR format + * @param nClusters Number of clusters. + * @param clusters (Input, device memory, n entries) Cluster assignments. + * @param modularity On exit, modularity + */ +template +void analyzeModularity(handle_t const& handle, + sparse_matrix_t const& csr_m, + vertex_t nClusters, + vertex_t const* __restrict__ clusters, + weight_t& modularity) +{ + RAFT_EXPECTS(clusters != nullptr, "Null clusters buffer."); + + vertex_t i; + vertex_t n = csr_m.nrows_; + weight_t partModularity, clustersize; + + auto cublas_h = handle.get_cublas_handle(); + auto stream = handle.get_stream(); + + // Device memory + vector_t part_i(handle, n); + vector_t Bx(handle, n); + + // Initialize cuBLAS + RAFT_CUBLAS_TRY(cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream)); + + // Initialize Modularity + modularity_matrix_t B{handle, csr_m}; + + // Initialize output + modularity = 0; + + // Iterate through partitions + for (i = 0; i < nClusters; ++i) { + if (!construct_indicator(handle, i, n, clustersize, partModularity, clusters, part_i, Bx, B)) { + WARNING("empty partition"); + continue; + } + + // Record results + modularity += partModularity; + } + + modularity = modularity / B.diagonal_.nrm1(); +} + +} // namespace detail +} // namespace spectral +} // namespace raft diff --git a/cpp/include/raft/spectral/detail/partition.hpp b/cpp/include/raft/spectral/detail/partition.hpp new file mode 100644 index 0000000000..b7c811d5a5 --- /dev/null +++ b/cpp/include/raft/spectral/detail/partition.hpp @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include + +namespace raft { +namespace spectral { +namespace detail { + +using namespace matrix; +using namespace linalg; + +// ========================================================= +// Spectral partitioner +// ========================================================= + +/// Compute spectral graph partition +/** Compute partition for a weighted undirected graph. This + * partition attempts to minimize the cost function: + * Cost = \sum_i (Edges cut by ith partition)/(Vertices in ith partition) + * + * @param G Weighted graph in CSR format + * @param nClusters Number of partitions. + * @param nEigVecs Number of eigenvectors to compute. + * @param maxIter_lanczos Maximum number of Lanczos iterations. + * @param restartIter_lanczos Maximum size of Lanczos system before + * implicit restart. + * @param tol_lanczos Convergence tolerance for Lanczos method. + * @param maxIter_kmeans Maximum number of k-means iterations. + * @param tol_kmeans Convergence tolerance for k-means algorithm. + * @param clusters (Output, device memory, n entries) Partition + * assignments. + * @param iters_lanczos On exit, number of Lanczos iterations + * performed. + * @param iters_kmeans On exit, number of k-means iterations + * performed. + * @return statistics: number of eigensolver iterations, . + */ +template +std::tuple partition(handle_t const& handle, + sparse_matrix_t const& csr_m, + EigenSolver const& eigen_solver, + ClusterSolver const& cluster_solver, + vertex_t* __restrict__ clusters, + weight_t* eigVals, + weight_t* eigVecs) +{ + RAFT_EXPECTS(clusters != nullptr, "Null clusters buffer."); + RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer."); + RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer."); + + auto stream = handle.get_stream(); + auto cublas_h = handle.get_cublas_handle(); + + std::tuple + stats; //{iters_eig_solver,residual_cluster,iters_cluster_solver} // # iters eigen solver, + // cluster solver residual, # iters cluster solver + + vertex_t n = csr_m.nrows_; + + // ------------------------------------------------------- + // Spectral partitioner + // ------------------------------------------------------- + + // Compute eigenvectors of Laplacian + + // Initialize Laplacian + /// sparse_matrix_t A{handle, graph}; + laplacian_matrix_t L{handle, csr_m}; + + auto eigen_config = eigen_solver.get_config(); + auto nEigVecs = eigen_config.n_eigVecs; + + // Compute smallest eigenvalues and eigenvectors + std::get<0>(stats) = eigen_solver.solve_smallest_eigenvectors(handle, L, eigVals, eigVecs); + + // Whiten eigenvector matrix + transform_eigen_matrix(handle, n, nEigVecs, eigVecs); + + // Find partition clustering + auto pair_cluster = cluster_solver.solve(handle, n, nEigVecs, eigVecs, clusters); + + std::get<1>(stats) = pair_cluster.first; + std::get<2>(stats) = pair_cluster.second; + + return stats; +} + +// ========================================================= +// Analysis of graph partition +// ========================================================= + +/// Compute cost function for partition +/** This function determines the edges cut by a partition and a cost + * function: + * Cost = \sum_i (Edges cut by ith partition)/(Vertices in ith partition) + * Graph is assumed to be weighted and undirected. + * + * @param G Weighted graph in CSR format + * @param nClusters Number of partitions. + * @param clusters (Input, device memory, n entries) Partition + * assignments. + * @param edgeCut On exit, weight of edges cut by partition. + * @param cost On exit, partition cost function. + * @return error flag. + */ +template +void analyzePartition(handle_t const& handle, + sparse_matrix_t const& csr_m, + vertex_t nClusters, + const vertex_t* __restrict__ clusters, + weight_t& edgeCut, + weight_t& cost) +{ + RAFT_EXPECTS(clusters != nullptr, "Null clusters buffer."); + + vertex_t i; + vertex_t n = csr_m.nrows_; + + auto stream = handle.get_stream(); + auto cublas_h = handle.get_cublas_handle(); + + weight_t partEdgesCut, clustersize; + + // Device memory + vector_t part_i(handle, n); + vector_t Lx(handle, n); + + // Initialize cuBLAS + RAFT_CUBLAS_TRY(cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream)); + + // Initialize Laplacian + /// sparse_matrix_t A{handle, graph}; + laplacian_matrix_t L{handle, csr_m}; + + // Initialize output + cost = 0; + edgeCut = 0; + + // Iterate through partitions + for (i = 0; i < nClusters; ++i) { + // Construct indicator vector for ith partition + if (!construct_indicator(handle, i, n, clustersize, partEdgesCut, clusters, part_i, Lx, L)) { + WARNING("empty partition"); + continue; + } + + // Record results + cost += partEdgesCut / clustersize; + edgeCut += partEdgesCut / 2; + } +} + +} // namespace detail +} // namespace spectral +} // namespace raft diff --git a/cpp/include/raft/spectral/spectral_util.hpp b/cpp/include/raft/spectral/detail/spectral_util.cuh similarity index 100% rename from cpp/include/raft/spectral/spectral_util.hpp rename to cpp/include/raft/spectral/detail/spectral_util.cuh diff --git a/cpp/include/raft/spectral/warn_dbg.hpp b/cpp/include/raft/spectral/detail/warn_dbg.hpp similarity index 100% rename from cpp/include/raft/spectral/warn_dbg.hpp rename to cpp/include/raft/spectral/detail/warn_dbg.hpp diff --git a/cpp/include/raft/spectral/eigen_solvers.hpp b/cpp/include/raft/spectral/eigen_solvers.hpp index 156b996586..192dc15a6b 100644 --- a/cpp/include/raft/spectral/eigen_solvers.hpp +++ b/cpp/include/raft/spectral/eigen_solvers.hpp @@ -18,6 +18,7 @@ #include namespace raft { +namespace spectral { using namespace matrix; @@ -95,4 +96,6 @@ struct lanczos_solver_t { private: eigen_solver_config_t config_; }; + +} // namespace spectral } // namespace raft diff --git a/cpp/include/raft/spectral/modularity_maximization.hpp b/cpp/include/raft/spectral/modularity_maximization.hpp index 8188a772b8..466851c74f 100644 --- a/cpp/include/raft/spectral/modularity_maximization.hpp +++ b/cpp/include/raft/spectral/modularity_maximization.hpp @@ -16,44 +16,13 @@ #pragma once -#include -#include - -#include -#include -#include -#include - #include -#include -#include -#include - -#ifdef COLLECT_TIME_STATISTICS -#include -#include -#include -#include -#include -#endif - -#ifdef COLLECT_TIME_STATISTICS -static double timer(void) -{ - struct timeval tv; - cudaDeviceSynchronize(); - gettimeofday(&tv, NULL); - return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0; -} -#endif +#include namespace raft { namespace spectral { -using namespace matrix; -using namespace linalg; - // ========================================================= // Spectral modularity_maximization // ========================================================= @@ -89,44 +58,8 @@ std::tuple modularity_maximization( weight_t* eigVals, weight_t* eigVecs) { - RAFT_EXPECTS(clusters != nullptr, "Null clusters buffer."); - RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer."); - RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer."); - - auto stream = handle.get_stream(); - auto cublas_h = handle.get_cublas_handle(); - - std::tuple - stats; // # iters eigen solver, cluster solver residual, # iters cluster solver - - vertex_t n = csr_m.nrows_; - - // Compute eigenvectors of Modularity Matrix - - // Initialize Modularity Matrix - modularity_matrix_t B{handle, csr_m}; - - auto eigen_config = eigen_solver.get_config(); - auto nEigVecs = eigen_config.n_eigVecs; - - // Compute eigenvectors corresponding to largest eigenvalues - std::get<0>(stats) = eigen_solver.solve_largest_eigenvectors(handle, B, eigVals, eigVecs); - - // Whiten eigenvector matrix - transform_eigen_matrix(handle, n, nEigVecs, eigVecs); - - // notice that at this point the matrix has already been transposed, so we are scaling - // columns - scale_obs(nEigVecs, n, eigVecs); - RAFT_CHECK_CUDA(stream); - - // Find partition clustering - auto pair_cluster = cluster_solver.solve(handle, n, nEigVecs, eigVecs, clusters); - - std::get<1>(stats) = pair_cluster.first; - std::get<2>(stats) = pair_cluster.second; - - return stats; + return detail::modularity_maximization( + handle, csr_m, eigen_solver, cluster_solver, clusters, eigVals, eigVecs); } //=================================================== // Analysis of graph partition @@ -146,42 +79,7 @@ void analyzeModularity(handle_t const& handle, vertex_t const* __restrict__ clusters, weight_t& modularity) { - RAFT_EXPECTS(clusters != nullptr, "Null clusters buffer."); - - vertex_t i; - vertex_t n = csr_m.nrows_; - weight_t partModularity, clustersize; - - auto cublas_h = handle.get_cublas_handle(); - auto stream = handle.get_stream(); - - // Device memory - vector_t part_i(handle, n); - vector_t Bx(handle, n); - - // Initialize cuBLAS - // #TODO: Use public API when ready - RAFT_CUBLAS_TRY( - raft::linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream)); - - // Initialize Modularity - modularity_matrix_t B{handle, csr_m}; - - // Initialize output - modularity = 0; - - // Iterate through partitions - for (i = 0; i < nClusters; ++i) { - if (!construct_indicator(handle, i, n, clustersize, partModularity, clusters, part_i, Bx, B)) { - WARNING("empty partition"); - continue; - } - - // Record results - modularity += partModularity; - } - - modularity = modularity / B.diagonal_.nrm1(); + detail::analyzeModularity(handle, csr_m, nClusters, clusters, modularity); } } // namespace spectral diff --git a/cpp/include/raft/spectral/partition.hpp b/cpp/include/raft/spectral/partition.hpp index 5b1478baa9..597ef530a2 100644 --- a/cpp/include/raft/spectral/partition.hpp +++ b/cpp/include/raft/spectral/partition.hpp @@ -15,26 +15,13 @@ */ #pragma once -#include -#include - -#include -#include -#include -#include - #include -#include -#include -#include +#include namespace raft { namespace spectral { -using namespace matrix; -using namespace linalg; - // ========================================================= // Spectral partitioner // ========================================================= @@ -70,45 +57,8 @@ std::tuple partition(handle_t const& handle, weight_t* eigVals, weight_t* eigVecs) { - RAFT_EXPECTS(clusters != nullptr, "Null clusters buffer."); - RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer."); - RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer."); - - auto stream = handle.get_stream(); - auto cublas_h = handle.get_cublas_handle(); - - std::tuple - stats; //{iters_eig_solver,residual_cluster,iters_cluster_solver} // # iters eigen solver, - // cluster solver residual, # iters cluster solver - - vertex_t n = csr_m.nrows_; - - // ------------------------------------------------------- - // Spectral partitioner - // ------------------------------------------------------- - - // Compute eigenvectors of Laplacian - - // Initialize Laplacian - /// sparse_matrix_t A{handle, graph}; - laplacian_matrix_t L{handle, csr_m}; - - auto eigen_config = eigen_solver.get_config(); - auto nEigVecs = eigen_config.n_eigVecs; - - // Compute smallest eigenvalues and eigenvectors - std::get<0>(stats) = eigen_solver.solve_smallest_eigenvectors(handle, L, eigVals, eigVecs); - - // Whiten eigenvector matrix - transform_eigen_matrix(handle, n, nEigVecs, eigVecs); - - // Find partition clustering - auto pair_cluster = cluster_solver.solve(handle, n, nEigVecs, eigVecs, clusters); - - std::get<1>(stats) = pair_cluster.first; - std::get<2>(stats) = pair_cluster.second; - - return stats; + return detail::partition( + handle, csr_m, eigen_solver, cluster_solver, clusters, eigVals, eigVecs); } // ========================================================= @@ -137,43 +87,7 @@ void analyzePartition(handle_t const& handle, weight_t& edgeCut, weight_t& cost) { - RAFT_EXPECTS(clusters != nullptr, "Null clusters buffer."); - - vertex_t i; - vertex_t n = csr_m.nrows_; - - auto stream = handle.get_stream(); - auto cublas_h = handle.get_cublas_handle(); - - weight_t partEdgesCut, clustersize; - - // Device memory - vector_t part_i(handle, n); - vector_t Lx(handle, n); - - // Initialize cuBLAS - RAFT_CUBLAS_TRY(cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream)); - - // Initialize Laplacian - /// sparse_matrix_t A{handle, graph}; - laplacian_matrix_t L{handle, csr_m}; - - // Initialize output - cost = 0; - edgeCut = 0; - - // Iterate through partitions - for (i = 0; i < nClusters; ++i) { - // Construct indicator vector for ith partition - if (!construct_indicator(handle, i, n, clustersize, partEdgesCut, clusters, part_i, Lx, L)) { - WARNING("empty partition"); - continue; - } - - // Record results - cost += partEdgesCut / clustersize; - edgeCut += partEdgesCut / 2; - } + detail::analyzePartition(handle, csr_m, nClusters, clusters, edgeCut, cost); } } // namespace spectral diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index a3df5c7a4b..fda60e1cb0 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -65,8 +65,6 @@ add_executable(test_raft test/matrix/math.cu test/matrix/matrix.cu test/matrix/linewise_op.cu - test/mr/device/buffer.cpp - test/mr/host/buffer.cpp test/mst.cu test/random/rng.cu test/random/rng_int.cu diff --git a/cpp/test/cluster_solvers.cu b/cpp/test/cluster_solvers.cu index 2c7996514a..0030596e21 100644 --- a/cpp/test/cluster_solvers.cu +++ b/cpp/test/cluster_solvers.cu @@ -19,9 +19,11 @@ #include #include +#include #include namespace raft { +namespace spectral { TEST(Raft, ClusterSolvers) { @@ -60,7 +62,12 @@ TEST(Raft, ModularitySolvers) using value_type = double; handle_t h; - ASSERT_EQ(0, h.get_device()); + ASSERT_EQ(0, + h. + + get_device() + + ); index_type neigvs{10}; index_type maxiter{100}; @@ -95,4 +102,5 @@ TEST(Raft, ModularitySolvers) EXPECT_ANY_THROW(spectral::analyzeModularity(h, sm, k, clusters, modularity)); } +} // namespace spectral } // namespace raft diff --git a/cpp/test/eigen_solvers.cu b/cpp/test/eigen_solvers.cu index f898d11d2e..541d4dccc8 100644 --- a/cpp/test/eigen_solvers.cu +++ b/cpp/test/eigen_solvers.cu @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -25,6 +26,7 @@ #include namespace raft { +namespace spectral { TEST(Raft, EigenSolvers) { @@ -34,7 +36,12 @@ TEST(Raft, EigenSolvers) using value_type = double; handle_t h; - ASSERT_EQ(0, h.get_device()); + ASSERT_EQ(0, + h. + + get_device() + + ); index_type* ro{nullptr}; index_type* ci{nullptr}; @@ -75,7 +82,12 @@ TEST(Raft, SpectralSolvers) using value_type = double; handle_t h; - ASSERT_EQ(0, h.get_device()); + ASSERT_EQ(0, + h. + + get_device() + + ); index_type neigvs{10}; index_type maxiter{100}; @@ -109,4 +121,5 @@ TEST(Raft, SpectralSolvers) EXPECT_ANY_THROW(spectral::analyzePartition(h, sm, k, clusters, edgeCut, cost)); } +} // namespace spectral } // namespace raft diff --git a/cpp/test/label/label.cu b/cpp/test/label/label.cu index d441bf95a8..b19accc3b4 100644 --- a/cpp/test/label/label.cu +++ b/cpp/test/label/label.cu @@ -16,7 +16,7 @@ #include -#include +#include #include "../test_utils.h" #include diff --git a/cpp/test/label/merge_labels.cu b/cpp/test/label/merge_labels.cu index 5d30af795f..db6b34bbd6 100644 --- a/cpp/test/label/merge_labels.cu +++ b/cpp/test/label/merge_labels.cu @@ -15,7 +15,7 @@ */ #include -#include +#include #include "../test_utils.h" #include diff --git a/cpp/test/lap/lap.cu b/cpp/test/lap/lap.cu index afdebae1f8..24e1c6be4f 100644 --- a/cpp/test/lap/lap.cu +++ b/cpp/test/lap/lap.cu @@ -28,7 +28,7 @@ #include #include -#include +#include #include #define PROBLEMSIZE 1000 // Number of rows/columns diff --git a/cpp/test/mr/device/buffer.cpp b/cpp/test/mr/device/buffer.cpp deleted file mode 100644 index 4861a4ca1f..0000000000 --- a/cpp/test/mr/device/buffer.cpp +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -namespace raft { -namespace mr { -namespace device { - -TEST(Raft, DeviceBufferAlloc) -{ - cudaStream_t stream; - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); - // no allocation at construction - rmm::device_uvector buff(0, stream); - ASSERT_EQ(0, buff.size()); - // explicit allocation after construction - buff.resize(20, stream); - ASSERT_EQ(20, buff.size()); - // resizing to a smaller buffer size - buff.resize(10, stream); - ASSERT_EQ(10, buff.size()); - // explicit deallocation - buff.release(); - ASSERT_EQ(0, buff.size()); - // use these methods without the explicit stream parameter - buff.resize(20, stream); - ASSERT_EQ(20, buff.size()); - buff.resize(10, stream); - ASSERT_EQ(10, buff.size()); - buff.release(); - ASSERT_EQ(0, buff.size()); - RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); - RAFT_CUDA_TRY(cudaStreamDestroy(stream)); -} - -TEST(Raft, DeviceBufferZeroResize) -{ - // Create a limiting_resource_adaptor to track allocations - auto curr_mr = - dynamic_cast(rmm::mr::get_current_device_resource()); - auto limit_mr = - std::make_shared>(curr_mr, - 1000); - - rmm::mr::set_current_device_resource(limit_mr.get()); - - cudaStream_t stream; - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); - // no allocation at construction - rmm::device_uvector buff(10, stream); - ASSERT_EQ(10, buff.size()); - // explicit allocation after construction - buff.resize(0, stream); - ASSERT_EQ(0, buff.size()); - // resizing to a smaller buffer size - buff.resize(20, stream); - ASSERT_EQ(20, buff.size()); - // explicit deallocation - buff.release(); - ASSERT_EQ(0, buff.size()); - - // Now check that there is no memory left. (Used to not be true) - ASSERT_EQ(0, limit_mr->get_allocated_bytes()); - - rmm::mr::set_current_device_resource(curr_mr); - - RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); - RAFT_CUDA_TRY(cudaStreamDestroy(stream)); -} - -} // namespace device -} // namespace mr -} // namespace raft diff --git a/cpp/test/mr/host/buffer.cpp b/cpp/test/mr/host/buffer.cpp deleted file mode 100644 index d645ffa0e0..0000000000 --- a/cpp/test/mr/host/buffer.cpp +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -namespace raft { -namespace mr { -namespace host { - -TEST(Raft, HostBuffer) -{ - auto alloc = std::make_shared(); - cudaStream_t stream; - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); - // no allocation at construction - buffer buff(alloc, stream); - ASSERT_EQ(0, buff.size()); - // explicit allocation after construction - buff.resize(20, stream); - ASSERT_EQ(20, buff.size()); - // resizing to a smaller buffer size - buff.resize(10, stream); - ASSERT_EQ(10, buff.size()); - // explicit deallocation - buff.release(stream); - ASSERT_EQ(0, buff.size()); - // use these methods without the explicit stream parameter - buff.resize(20); - ASSERT_EQ(20, buff.size()); - buff.resize(10); - ASSERT_EQ(10, buff.size()); - buff.release(); - ASSERT_EQ(0, buff.size()); - RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); - RAFT_CUDA_TRY(cudaStreamDestroy(stream)); -} - -TEST(Raft, DeviceToHostBuffer) -{ - auto d_alloc = std::make_shared(); - auto h_alloc = std::make_shared(); - cudaStream_t stream; - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); - device::buffer d_buff(d_alloc, stream, 32); - RAFT_CUDA_TRY(cudaMemsetAsync(d_buff.data(), 0, sizeof(char) * d_buff.size(), stream)); - buffer h_buff(h_alloc, d_buff); - ASSERT_EQ(d_buff.size(), h_buff.size()); - RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); - RAFT_CUDA_TRY(cudaStreamDestroy(stream)); -} - -} // namespace host -} // namespace mr -} // namespace raft diff --git a/cpp/test/spectral_matrix.cu b/cpp/test/spectral_matrix.cu index fa54b04cda..652aa61451 100644 --- a/cpp/test/spectral_matrix.cu +++ b/cpp/test/spectral_matrix.cu @@ -19,7 +19,7 @@ #include #include -#include +#include namespace raft { namespace {